#!/usr/bin/perl # Text data processing toolkit for Biomolecular NMR spectroscopy # Copyright (C) 2009 Evgeny A. Fadeev , R.T. Clubb Lab # & Regents of the University of California # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . BEGIN { my $email='evgeny.fadeev@gmail.com'; my $name='Evgeny'; # you may adjust the number below to lower # and try if it works, or maybe debug the # script for older versions of perl interpreter if ($] < 5.008001) { print "\n\tThis script requires Perl version 5.8.1\n"; print "\tor higher. Please let me know if you want to\n"; print "\trun it on older versions. Thanks.\n\n"; print "\t$name, $email\n\n"; exit; } # $SIG{__DIE__} = sub { app::die(join(' ', @_));exit; }; } use strict; # put external location of bio perl libraries #use lib '/Users/fadeev/bin/PERL_LIBRARIES/lib/perl5/site_perl/5.8.6/darwin-thread-multi-2level'; package toolkit; #setups #-- %user_nmr_dir --------------------------------------- #hash table for list of user directories with nmr data #it is assumed that directory structure is flat and bruker-like, i.e. #all datasets are in one directories and within each dataset directory #are actual experiment directories (which have to have numeric names) #if one user keeps experiments in more then one place then values of hashtable #must be references to array that contains #if data location is bruker standard, then directory name must be given as '' #if data location is non-standard, then directory must be full qualifying path (~'s allowed) #to directory where datasets are located my %user_nmr_dir = {}; # example %user_nmr_dir = (user1=>['dir1','dir2], # user2=>'user2dir'); #------------------------------------------------------- #global constants my $programmer = 'fadeev'; my $PROGRAMMER_EMAIL = 'evgeny.fadeev@gmail.com'; my $ERROR_REPORTING = 0;#true or false - submit error to a my $ERROR_DIR = undef;#this directory must be write-accessible for everyone my $COMMAND = "$0 " . join(' ',@ARGV); my @FILES;#array for files open by the program #these three are for matching residue label from Sparky #first will match labels like Y24 second for r1+5 third for like k11 #pseudoassigned type of labels are good for labeling unassigned spin systems #that already have relative sequential assignments #NOTE: matching in second will match those matched by third, too my $ASSIGNED_FORMAT = '^([AYMLCGRNDQEHKFPSTI])(\d+)$'; my $PSEUDOASSIGNED_FORMAT = '^([a-z]+\d+)([+-]\d+)?$'; my $PSEUDOASSIGNED_ROOT_FORMAT = '^([a-z]+\d+)([+-]0)?$'; my $ENDOFLINE = '(?:\012|\015|\015\012)'; my $NUMPATTERN = '[+-]?(?:(?:[1-9]\d*(?:\.\d+)?)|0|(?:0?\.\d+))(?:[eE][+-]?\d+)?';#pattern matching numbers my $NAME_RG ='(name\s+[a-zA-Z]{1,2}[#%*\d]{0,2})';#for xplor package my $NAMES_RG = '($\s+'.$NAME_RG.'\s+(?:or '.$NAME_RG.')*\s+$)';#for xplor package my %AA = (ALA=>'A',TYR=>'Y',MET=>'M',LEU=>'L',CYS=>'C',GLY=>'G', ARG=>'R',ASN=>'N',ASP=>'D',GLN=>'Q',GLU=>'E',HIS=>'H',TRP=>'W', LYS=>'K',PHE=>'F',PRO=>'P',SER=>'S',THR=>'T',ILE=>'I',VAL=>'V', 'GUA'=>'g','ADE'=>'a','CYT'=>'c','THY'=>'t'); my $shift_msg = "-shifts argument is required (sparky res. list)" ; my $searchp_msg = "\n\t-searchp [-tol ] -res -files \n\n". "\tfind peaks from picked peaks in sparky .save files\n\n". "\t optional tolerance for resonance search\n". "\tdefault 0.01 ppm\n\t - resonance position\n". "\t specta .save file to be searched\n". "\tdefault - './*.save'\n\n"; #this hash is the main switchboard for this program #the key is what comes from the first argument in @ARGV #the value is a hash that contains the following elements: #fun => reference to the corresponding function #par => par list of parameters (explained in detail below) #synopsis => short description of the command # #par: #hash that describes parameters, their types and default values #this hash has three possible keys (that stand for parameter types) #default - unnamed parameter (may be required or optional) #required - required named parameter #optional - optional named parameter #hardcoded - hash of additional function arguments (optional) # these arguments will override any user set parameter # in case if names coincide # #keys 'default', 'required' and 'optional' point to hash tables #that describe corresponding parameters (but structures of all three #hash tables are different because these three types of parameters #have different behavior # #'default' hash: # key value #'description' string of text describing the meaning of parameter #'default' default value (used only if this parameter is not required) # that is if this element of hash table is not set # then unnamed parameter will be expected from input # or else program should abort with error # #'required' hash: #keys - parameter names #values - their description # #'optional' hash: #keys - parameter names #values - hash tables with description of individual parameters #that nested hash has the following structure: # key value # 'description' description of the parameter # 'type'(optional) 'flag'|'parameter' (the latter is default) # 'default' default value of the parameter # use File::Basename; my $prog = basename $0; my %CAT = ( 'internal'=> { 'synopsis'=>'under construction', 'priority'=>-1 }, 'spec_analysis'=>{ 'synopsis'=>'general analysis of spectral data', 'priority'=>1 }, 'noe'=> { 'synopsis'=>'NOE processing and analysis', 'priority'=>1.1 }, 'other'=>{ 'synopsis'=>'other functions', 'priority'=>5 }, 'data_proc'=>{ 'synopsis'=>'spectrometer data processing', 'priority'=>2 }, 'data_IO'=>{ 'synopsis'=>'Data converters, adapters to other software.', 'priority'=>3 }, 'DATA'=>{ 'synopsis'=>'Export and import of data for analysis by this program', 'priority'=>3.5 }, 'exp_setup'=>{ 'synopsis'=>'experiment setup (Bruker)', 'priority'=>4 }, 'help'=> { 'synopsis'=>'help', 'priority'=>99999999 } ); my %fun = ( '-help'=> { 'category'=>'help', 'synopsis'=>"Get help for available tools and general ". "notes on program usage", 'fun'=>$toolkit::{help}, 'par'=>{ 'optional'=>{ 'function'=>{ 'description'=>'function to retrieve help information about', 'type'=>'flag', 'default'=>'false' } } }, 'help'=>"GET LIST OF FUNCTIONS: \"$prog | more\"\n". "\nUSE A PARTICULAR FUNCTION:\"$prog \"". "\n can be any one given in the list of functions,\n". " can be given in any order ". "but it is important that is a first argument to \"$prog\"\n". "\nGET HELP ON FUNCTION: \"$prog -help\" or ". "\"$prog -help \" (example \"$prog -conv -help\")". "\n\nGENERAL NOTES: Some function parameters will require ". "additional values, while others won't - all that is ". "explained in help records particular to each function. ". "Both function names and parameter names start with \"-\"". "for example: $prog -conv -axisorder 132" }, -pdbseg=> { 'fun'=>$toolkit::{pdbseg}, 'category'=>'other', 'synopsis'=>'show segments in pdb files or renumber residues and rename segments', 'par'=>{ 'required'=> { -pdb=>'list of pdb files' }, 'optional'=> { -list=>{ 'default'=>'false', 'type'=>'flag', 'description'=>'list available segments and residue number ranges' }, -to=>{ 'default'=>undef, 'description'=>'target segment name for renaming other segments to', }, -from=>{ 'default'=>undef, 'description'=>'segment names to be renamed and residue number offsets '. 'that needs to be applied for renumbering (example: -to DNA -from dnaA '. '100 -from dnaB 120. In this example segments dnaA and dnaB will '. 'be renamed to segment dna and residue numbers of segment dnaA will be '. 'increased by 100 and dnaB - by 120)' }, -dir=>{ 'default'=>undef, 'description'=>'directory to store output files, will be created '. 'if it does not exist, files will be overwritten', } } }, 'help'=>'if -list is used then only listing of available segments will be shown, '. 'no renumbering of residues will be done. '."\n". 'NOTE: pdb files are not validated by this subroutine, and multimodel (bundle) '. 'pdb files are not supported at this point.' }, -debug=> { 'fun'=>$toolkit::{debug}, 'category'=>'internal', 'par'=>{ 'default'=>{ alias=>'-p0', default=>undef }, 'optional'=> { -p1=>{ 'default'=>undef }, -p2=>{ 'default'=>undef } } }, 'help'=>'function for commandline driven testing' }, -pt2spec=>{ 'category'=>'data_proc', 'fun'=>$toolkit::{pt2spec}, 'synopsis'=>'convert text dump of 1D nmr experiment from pipe2txt.tcl to '. 'text file with first column - chemical shift and second - intensity', 'par'=>{ 'required'=>{ -in=>'input data table with two columns take out of result of pipe2txt.tcl script', -sw=>'spectrum width, Hz or ppm (e.g. \'-sw 100hz\' means 100Hz, \'-sw 100ppm\' means 100ppm)', }, 'optional'=>{ -obs=>{ 'description'=>'NMR carries frequency, MHz. Required, if -sw is given in Hz', 'default'=>undef }, -first=>{ 'description'=>'ppm coordinate of the first point (use either -first or -cen)', 'default'=>undef }, -cen=>{ 'description'=>'ppm coordinate of the center', 'default'=>undef }, } } }, -txt2rdc=> { 'fun'=>$toolkit::{txt2rdc}, 'synopsis'=>'convert three column numeric table with scalar couplings to XPLOR rdc '. 'restraint table (see -help for more options)', 'category'=>'data_IO', 'par'=>{ 'required'=>{ -names=>'XPLOR names of two atoms involved in the dipolar coupling', -in=>'input table (first column - residue number, second column - '. 'isotropic coupling, third - aligned medium coupling '. '(use -neg if columns 2&3 are switched)' }, 'optional'=> { -offset=>{ 'description'=>'number that needs to be added to numbers in column 1 to '. 'reflect the residue number corresponding to the RDC', 'default'=>0 }, -neg=>{ 'description'=>'flag, if selected then sign of the calculated RDC will be switched'. 'Use it if columns 2 and 3 are switched in the input file', 'default'=>'false', 'type'=>'flag', }, -class=>{ 'description'=>'name of the restraint class', 'default'=>undef, }, -err=>{ 'description'=>'coupling error to use in the XPLOR statements', 'default'=>0.2 }, -scale=>{ 'description'=>'type of RDC to use for conversion to amid scale (HACA|NCO|CACO)', 'default'=>undef }, -pales=>{ 'description'=>'output rdc table in PALES format', 'default'=>'false', 'type'=>'flag' }, -seq=>{ 'description'=>'protein sequence file, one-letter format', 'default'=>undef } } }, 'help'=>'' }, -xeasy2ass=>{ 'synopsis'=>'xeasy (ass. + sequence) --> pipp assignment table', 'category'=>'data_IO', 'par'=>{ 'required'=>{ '-seq'=>'xeasy sequence file', '-prot'=>'xeasy assignment table', }, }, 'help'=>'', 'fun'=>$toolkit::{xeasy2ass} }, -xeasy2pk=>{ 'synopsis'=>'xeasy (peak + ass. + sequence) --> pipp or sparky peaklist', 'category'=>'internal', 'futurecategory'=>'data_IO', 'par'=>{ 'required'=>{ '-seq'=>'xeasy sequence file', '-prot'=>'xeasy assignment table', '-peak'=>'xeasy peak file', '-fmt'=>'output peak table format: pipp|sparky' }, 'optional'=>{ '-out'=>{ 'description'=>'output file name', 'default'=>undef } } }, 'help'=>'', 'fun'=>$toolkit::{xeasy2pk} }, -shiftmap=>{ 'synopsis'=>'Generate pdb file with chemical shift perturbation map stored in the b-factors', 'category'=>'other', 'par'=>{ 'required'=>{ '-pdb'=>'path to pdb file', '-ppm1'=>'first assignment table', '-ppm2'=>'second assignment table', '-fmt'=>'format of both (or just first) assignment tables: pipp|sparky', '-nom'=>'atom naming convention for both (or just first) assignment tables', '-out'=>'output pdb file' }, 'optional'=>{ '-fmt2'=>{ 'description'=>'format of second assigment table, '. 'needed if different from -fmt', 'default'=>undef }, '-nom2'=>{ 'description'=>'atom naming convention of second assigment table, '. 'needed if different from -nom', 'default'=>undef }, '-pdbnom2'=>{ 'description'=>'atom naming convention in pdb file if other then '. 'standard pdb', 'default'=>'pdb' }, '-atoms'=>{ 'description'=>'regular expression for atoms to be included into the map', 'default'=>'H' }, } }, 'help'=>'Differences are calculated as ppm2 - ppm1; ' . 'open output file in molmol and use b-factors to draw color-coded '. 'chemical shift perturbation map; if assignment tables are in different formats '. '-fmt must be used for -ppm1 and -fmt2 for -ppm2', 'fun'=>$toolkit::{shiftmap} }, -ambigrest=>{ 'synopsis'=>'turn stereospecific XPLOR distance restraints into ambiguous and merge related restraints', 'category'=>'noe', 'help'=>'weaker distance restraint will be retained of the two or more related restraints', 'par'=>{ 'required'=>{ '-seq'=>'protein aminoacid sequence file (one letter per aminoacid)', '-tbl'=>'XPLOR restraint files' } }, 'fun'=>$toolkit::{ambiguate_xplor_distance_restraints} }, -dist2xplor=>{ 'synopsis'=>'convert molmol distance calculation file to xplor distance restraints', 'category'=>'noe', 'help'=>'in molmol select atoms, then do CalcDist, save result in file, then run this command', 'par'=>{ 'required'=>{ '-in'=>'result of distance calculation by molmol' } }, 'fun'=>$toolkit::{dist2xplor} }, -noe2txt=>{ 'synopsis'=>'convert XPLOR distance restraint file to human readable table', 'category'=>'noe', 'help'=>'', 'par'=>{ 'required'=>{ '-tbl'=>'XPLOR restraint files', '-seq'=>'protein sequence file (one letter format)' } }, 'fun'=>$toolkit::{noe2txt} }, '-restsummary'=>{ 'synopsis'=>'prepare restraint summary image for protein as in Fig.8 of 1998 IUPAC ' .'recommendation JMB 280(5), 933-952, 1998', 'category'=>'noe', 'help'=>'prints postscript output to screen, use %cmd > save_file syntax', 'par'=>{ 'required'=>{ '-noe'=>'noe restraint files in XPLOR format', '-seq'=>'protein sequence', '-ass'=>'assignment table', '-assfmt'=>'format of assignment table', '-assnom'=>'atom nomenclature used in the assignment table' }, 'optional'=>{ '-hnha'=>{ 'description'=>'hnha vicinal J coupling restraint files in XPLOR format', 'default'=>undef, }, } }, 'fun'=>$toolkit::{calc_restraint_report} }, -countnoe=>{ 'synopsis'=>'read XPLOR noe restraint files and report number of restraints '. 'by category (intraresidue, sequential, etc.)', 'category'=>'noe', 'help'=>'', 'par'=>{ 'required'=>{ '-tbl'=>'XPLOR restraint files' }, 'optional'=>{ '-dir'=>{ 'description'=>'directory for output of sorted restraints', 'default'=>undef } } }, 'fun'=>$toolkit::{count_noe_restraints} }, -showrest=>{ 'synopsis'=>'extract noe restraints within certain residue range', 'category'=>'noe', 'help'=>'', 'par'=>{ 'required'=>{ '-in'=>'NOE restraint file(s)', '-res'=>'two residue ranges, e.g 17 45-52 or 1-55 1-55' } }, 'fun'=>$toolkit::{showrest} }, -calcpck=>{ 'synopsis'=>'calculate PIPP PCK file for expected 3D-edited-NOESY peaks (COVALENT, '. 'INTERMOLECULAR, or INTERMOLECULAR) based on assignment table and structure models', 'category'=>'noe', 'help'=> 'FUNCTION BEHAVIOR: If flag -covalent is used, peaks for atoms separated by <4 '. 'bonds will be predicted, to calculate intramolecular peaks -covalent and -ass2 should '. 'not be used, '. 'and finally, to calculate intermolecular peaks, both -ass1 and -ass2 parameters must be used '. '(and -covalent flag cannot be used). '."\n". 'HINT: if assignment table for both molecules is in one file '. 'and you still want '. 'to predict intermolecular peaks, then use the same file as value for -ass1 and -ass2 and use '. " -res1 and -res2 to specify which residues belong to one and the other molecule\n". 'PARAMETERS: there are many options for this function. They can be grouped in three sets: (1) output '. 'PCK file parameters: -fold, -ref, -sw, -hetnuc, -axisorder; (2) ass table parameters for '. 'first molecule -ass1 '. '-res1 -assnom1; (3) ass table parameters for second molecule same as (2) but with 2 instead of 1; '. '(4) structure file related parameters -pdb, -pdbnom; '. '(5) flags modifying function behavior -covalent'."\n". 'ATOM NAMES: applies to -assnom1 and -assnom2 parameters. If these parameters are used, then atom '. 'names in the assignment table will be checked against a table of allowed names. Errors (if found) '. 'will be reported and program terminated. '. 'There are cases where nomenclature does not describe the molecule. '. 'For example, currently there is no nomenclature support for RNA and DNA atoms. '. 'In those cases use "none" as a value for -assnom1 or -assnom2, but you must make sure that '. 'atom names atom names match exactly atom names in the pdb files so that peaks could be predicted '. 'based on structures. Sticking to xplor nomenclature is helpful if you are using XPLOR generated '. 'pdb models.', 'par'=>{ 'required'=>{ -ass1=>'first assignment table', -hetnuc=>'heteronucleus used in the spectrum (options: N|C)', -pdb=>'PDB file(s) with structural models' }, 'optional'=>{ -cutoff=>{ 'description'=>'distance cutoff for noe calculation, Angstroms (default, 5.0A)', 'default'=>5.0 }, -pdbnom=>{ 'description'=>'atom name convention in pdb files (xplor|pdb). Default is "xplor"', 'default'=>'xplor' }, -ass2=>{ 'description'=>'second assignment table', 'default'=>undef }, -res1=>{ 'description'=>'residue range in first molecule to use for peak '. 'calculation (e.g 1-50,69,88). Default - selects all residues', 'default'=>undef }, -res2=>{ 'description'=>'residue range in second molecule to use for peak calculation. '. 'Default - select all residues', 'default'=>undef }, -assfmt1=>{ 'description'=>'format of first assignment table (pipp|sparky). Default is "pipp"', 'default'=>'pipp' }, -assfmt2=>{ 'description'=>'format of second assignment table (pipp|sparky). Default is "pipp"', 'default'=>'pipp' }, -assnom1=>{ 'description'=>'atom name nomenclature in first assignment table (xplor|iupac|none). '. 'Default - xplor. Use "none" to avoid validation of atom names', 'default'=>'xplor' }, -assnom2=>{ 'description'=>'atom name nomenclature in second assignment table (xplor|iupac|none). '. 'Default - xplor. Use "none" to avoid validation of atom names', 'default'=>'xplor' }, -fold=>{ 'description'=>'type of folding of heteronucleus axis (rsh|states-tppi|none). '. 'Default - none', 'default'=>'none' }, -ref=>{ 'description'=>'heteronucleus carrier position, ppm', 'default'=>undef }, -sw=>{ 'description'=>'heteronucleus axis spectrum width, ppm', 'default'=>undef }, -covalent=>{ 'description'=>'flag, calculate covalent noe peaks (protons separated by <4 bonds). '. 'Unused by default.', 'default'=>'false', 'type'=>'flag' }, -axisorder=>{ 'description'=>'order of columns in the output PCK file (e.g. 123, 132, 312), '. 'use if column order expected is different from default', 'default'=>'123' }, -novalidation=>{ 'description'=>'flag, turns off atom name validation in all assignment tables '. 'and pdb file', 'default'=>'false', 'type'=>'flag' } } }, 'fun'=>$toolkit::{calcpck} }, -prtvio=>{ 'synopsis'=>'print summary of violation analysis based on output of prtvio.inp '. 'xplor protocol', 'category'=>'noe', 'help'=>'', 'par'=>{ 'required'=>{ -in=>'prtvio.out file' } }, 'fun'=>$toolkit::{prtvio} }, -calibrate=>{ 'synopsis'=>'Calibrate NOEs from .PCK file '. ', compare NOEs with distances in PDB files and show '. 'abnormally strong signals, '. "or produce XPLOR distance restraint table.\n". "Read GENERAL NOTES in help for more details.", 'category'=>'noe', 'help'=>'GENERAL NOTES. Function will produce one of three possible types of output: '. '1) listing of xplor restraint file (if -xplor option is used),'. ' 2) a list of restraints violating distances found in pdb files '. '(if option -strong is used), 3) a plot of peak intensity vs. '. 'distances in pdb file and the noe calibration curve (explained below) '. '(with option -graph). (Note: '. "options -xplor, -strong, and -graph are mutually exclusive.)\n ". 'Outputs of type 2) and 3) require pdb files so -pdb option must be '. 'used with them. For producing xplor restraint file pdb files are not '. 'required if binned (-option bin) or fixed upper limit (option -uplim) '. 'calibration is used. '."\n\n". 'CALIBRATION DETAILS. Three types of distance restraint calibration are '. 'available: binned (option -bin), continuous (option -cont), '. 'and fixed upper limit (option -uplim). '. 'Binned calibration can be automatic or manual. For automatic '. 'binned calibration: 20% most intense signals are assigned lower and upper '. 'limits of 1.8A and 2.7A (strong), next most intense 30% - 1.8A to 3.3A '. '(intermediate), next 30% strongest - 1.8A to 5A (weak restraints), '. 'and the weakest 20% - 1.8A to 6.0A (very weak). For manual binned calibration '. 'paremeters -w, -m, -s must be used. -w intensity separating weak and very weak signals'. '-m boundary between weak and medium intensity signals, and -s boundary between '. 'medium and strong signals. '. 'Fixed upper limit calibration will set all restraints from 1.8A to specified '. 'upper limit (value of -uplim parameter). Continuous calibration is defined by '. 'equation A*r^(-exp)+B, where A and B are either calculated from given peak '. 'intensity distribution or provided by user as values of parameters -A and -B.'. 'parameter exp is filled with default value (4.8) or provided by user via '. '-exp parameter'."\n". 'ON PCK FILE. Note that PCK file can be produced from aria restraints '. 'by aria_to_PIPP.tcl script. Also note that only peak assignments and '. 'peak intensities are used from the PCK files. Chemical shifts are not used '. "\n\nDISTANCE DETERMINATION. The shortest distance from a set of pdb files will ". 'be chosen for calibration'. "\n\nEXAMPLES:\n". "1) XPLOR, automatic binned calibration: \"$prog -pck -bin -xplor\"\n". "2) XPLOR, manual binned calibration: \"$prog -pck -bin -w 3e5 -m 4e6 -s 5.5e7 -xplor\"\n". "3) XPLOR, fixed upper limit: $prog -calibrate -pck -uplim 5.5 ". "-xplor\n". "4) XPLOR, continuous calib: $prog -calibrate -pck -pdb ". "-nom xplor -cont -A 5e7 -B 1e4 -exp 5 (note: -A,-B and -exp are optional)\n". "5) XPLOR, base calibration on residues 40-54: $prog -calibrate -pck ". "-pdb ". "-nom xplor -cont -range 40-54\n". #"6) XPLOR, use only H (amid) or HA for calibration: $prog -calibrate -pck ". #"-pdb ". #"-nom xplor -cont -atoms 'H|HA\$' (note regular expression syntax here)\n". "6) GRAPHICAL output: switch -xplor to -graph from prevous examples.\n". "7) LIST of abnormally strong signals: switch -xplor to -strong", 'par'=>{ 'required'=>{ -pck=>'one pipp .PCK file with peak assignments' }, 'optional'=>{ '-pdb'=>{ 'description'=>'a list of pdb files for intensity calibration', 'default'=>undef, }, '-nolib'=>{ 'description'=>'do not use residue library for atom validation '. '(use it if your residue library is not standard protein)', 'type'=>'flag', 'default'=>'false' }, '-nom'=>{ 'description'=>'atom naming system in pdb files xplor, iupac, or pdb', 'default'=>undef }, '-range'=>{ 'description'=>'range of residues to use for noe calibration', 'default'=>undef }, '-w'=>{ 'description'=>'for binned calibration (-bin flag option, -m and -s must be'. 'used too). Signal level '. 'on the boundary between very weak and weak signals', 'default'=>undef }, '-m'=>{ 'description'=>'for binned calibration (-bin flag option, -w and -s must be'. 'used too). Signal level '. 'on the boundary between weak and medium signals', 'default'=>undef }, '-s'=>{ 'description'=>'for binned calibration (-bin flag option, -w and -m must be'. 'used too). Signal level '. 'on the boundary between medium and strong signals', 'default'=>undef }, #'-atoms'=>{ # 'description'=>'regular expression to select atoms for calibration', # 'default'=>undef #}, '-uplim'=>{ 'description'=>'distance restraint upper limit for use with fixed '. 'upper limit type of calibration', 'default'=>undef }, '-strong'=>{ 'description'=>'print NOE signals abnormally intense for the '. 'corresponding distance found in pdb files. Estimate of '. 'distance violation is also printed. '. 'this option can only be used together with -cont', 'type'=>'flag', 'default'=>'false' }, -bin=>{ 'description'=>'calibrate noe\'s as strong (1.8-2.7A), medium '. '(1.8-3.3A), weak (1.8-5A), and '. 'very weak (1.8-6.0A)', 'type'=>'flag', 'default'=>'false' }, -cont=>{ 'description'=>'use continuous function A*r^(-exp)+B '. 'for noe calibrations', 'type'=>'flag', 'default'=>'false' }, '-graph'=>{ 'description'=>'show a graph of peak intensity vs. distance in '. 'pdb files and noe calibration curve', 'type'=>'flag', 'default'=>'false' }, '-xplor'=>{ 'description'=>'produce xplor restraint list (with <3 bond NOEs removed)', 'type'=>'flag', 'default'=>'false' }, '-A'=>{ 'description'=>'constant A from noe intensity equation (see help)', 'default'=>undef }, '-B'=>{ 'description'=>'constant B from noe intensity equation (see help)', 'default'=>undef }, '-exp'=>{ 'description'=>'exponent from noe intensity equation, must be entered as '. 'positive number (see help), default 4.8', 'default'=>4.8 }, '-keepcov'=>{ 'description'=>'only applies to -xplor type of output, ' .'keep covalent noe\'s for the output, covalent restraints are ' .'removed by default', 'type'=>'flag', 'default'=>'false' } } }, 'fun'=>$toolkit::{calibrate_noe} }, '-close'=>{ 'synopsis'=>'Calculate close contacts with a particular atom/atom group', 'category'=>'other', 'par'=>{ 'required'=>{ '-pdb'=>'pdb file(s)', '-nom'=>'nomenclature xplor or pdb', '-at'=>'atom or atom group wildcard in '. 'xplor nomenclature, '. 'e.g. P13.HB1 or L13.HD1% or L85.HD#' }, 'optional'=>{ '-cutoff'=>{ 'description'=>'maximum acceptable distance for search of close contacts', 'default'=>5.0 }, '-nstr'=> { 'description'=>'minimal number of structures to satisfy distance cutoff requirement', 'default'=>1 }, '-ave'=>{ 'description'=>'averaging scheme SUM or CEN', 'default'=>'SUM' } } }, 'fun'=>$toolkit::{findclose}, 'help'=>'NOTE: atom names supplied with -at parameter '. 'are expected in XPLOR nomenclature' }, '-dist'=>{ 'synopsis'=>'Calculate distance between two atoms or atom '. 'groups from coordinates in pdb file', 'category'=>'other', 'par'=>{ 'required'=>{ '-pdb'=>'pdb file', '-nom'=>'nomenclature xplor or pdb', '-at'=>'two atoms or atom group wildcards in '. 'xplor nomenclature, '. 'e.g. P13.HB1 or L13.HD1% or L85.HD#' }, 'optional'=>{ '-ave'=>{ 'description'=>'averaging scheme SUM or CEN', 'default'=>'SUM' } } }, 'fun'=>$toolkit::{dist}, 'help'=>'NOTE: atom names supplied with -at parameter '. 'are expected in XPLOR nomenclature' }, '-importseq'=>{ 'synopsis'=>'Add protein sequence to project data file', 'category'=>'DATA', 'par'=>{ 'required'=>{ -dat=>'project data file', -seq=>'protein sequence in fasta format (only aminoacid letters)' } }, 'help'=>'', 'fun'=>$toolkit::{importseq} }, '-dnapipp'=>{ 'synopsis'=>'Produce blank pipp assignment table for dna by sequence', 'category'=>'data_IO', 'par'=>{ 'required'=>{ -seq=>'dna sequence file' }, 'optional'=>{ -atoms=>{ 'description'=>'atoms to be saved in the output, e.g. \'CHN\'', 'default'=>'H' } } }, 'fun'=>$toolkit::{dnapipp}, 'help'=>'dna sequece must be in lowercase letters, e.g. atgc 5\' to 3\' direction' }, '-pdbnm'=>{ 'synopsis'=>'Convert atom names in pdb files between '. 'iupac, xplor, and pdb styles', 'category'=>'data_IO', 'par'=>{ 'required'=>{ -in=>'input pdb file', -out=>'output pdb file', -ifmt=>'input nomenclature (xplor|iupac|pdb)', -ofmt=>'output nomenclature (xplor|iupac|pdb)' }, 'optional'=>{ -model=>{ 'description'=>'model number to extract', 'default'=>undef } } }, 'fun'=>$toolkit::{pdbnm}, 'help'=>'' }, '-hbond'=>{ 'category'=>'other', 'help'=>'IMPORTANT: always check output, this function will always '. 'be wrong for non-backbone H-bonds, so take care to fix them '. 'manually. Here is a short walkthrough: '. 'Open molmol, load pdb. Go through menus: Calc->H-bonds '. 'once you generate output copy-paste ONLY lines containing '. 'calculated hydrogen bond information into a file named say hbond.txt, '. 'i.e. do not copy any header lines. '. 'Then run this program with parameters: -hbond -in hbond.txt > file. '. 'Check output and fix non-backbone hydrogen bond manually.', 'par'=>{ 'required'=>{ -in=>'file with hydrogen bond info lines generated by molmol' } }, 'fun'=>$toolkit::{hbond}, 'synopsis'=>'Get XPLOR H-bond restraint table from MOLMOL H-bond '. 'calculation output (IMPORTANT - check help)', }, '-assfmt'=>{ 'synopsis'=>'Translate assignment table and atom name conventions', 'category'=>'data_IO', 'par'=>{ 'required'=>{ -in=>'input assignment table', -out=>'output assignment table', -ifmt=>'input file format (sparky|pipp|xeasy|cara|cppn)', }, 'optional'=>{ -inom=>{ 'description'=>'input nomenclature (xplor|iupac|cara), mandatory if -ifmt is not cara', 'default'=>undef }, -ofmt=>{ 'description'=>'output file format (sparky|pipp)', 'default'=>undef }, -iseq=>{ 'description'=>'aminoacid sequence, used only if -ifmt xeasy or cara', 'default'=>undef }, -onom=>{ 'description'=>'output nomenclature (xplor|iupac)', 'default'=>undef }, -reset=>{ 'description'=>'reset residue numbering to start from 1', 'default'=>'false', 'type'=>'flag' } } }, 'fun'=>$toolkit::{assfmt}, 'help'=>'' }, '-foldass'=>{ 'synopsis'=>'fold assignment table to desired SW and save result in a file', 'category'=>'spec_analysis', 'par'=>{ 'required'=>{ -in=>'input assignment table', -out=>'output assignment table with folded frequencies', -fmt=>'assignment table format - sparky or pipp', -sw=>'desired spectral width', -nom=>'atom nomenclature in the assignment table', '-ref'=>'desired spectral window center', -atoms=>'quoted (use single quotes, \') regular expression '. 'matching atom types whose frequencies '. 'are to be folded' }, 'optional'=>{ -ofmt=>{ 'description'=>'format of output assignment table', 'default'=>undef }, -onom=>{ 'description'=>'atom nomenclature for output', 'default'=>undef } } }, 'help'=>'folding will be applied only to atoms matching the expression '. 'provided with -atoms parameter. If you want to fold C axis atoms in '. '13C edited NOESY spectrum use \'C[A-Z]\' as expression, it will not '. 'match C (carbonyl carbon atoms in IUPAC nomenclature). If -ofmt is '. 'given, resulting assignment table will be saved in the specified '. 'format', 'fun'=>$toolkit::{foldass} }, '-trest'=>{ 'category'=>'noe', 'synopsis'=>'test noe restraint given in xplor format (see help for more info)', 'par'=>{ 'required'=>{ '-specno'=>'aria assigned numbers to spectra, e.g. cnoe=1 nnoe=2', '-in'=>'data project file in internal format with noe spectra and '. 'assignment table' }, 'optional'=> { '-pdb'=>{ 'description'=>'pdb file', 'default'=>undef }, '-nom'=>{ 'description'=>'atom nomenclature in pdb file', 'default'=>undef }, -cutoff=> { 'description'=>'noe distance cutoff', 'default'=>undef } } }, 'fun'=>$toolkit::{trest}, 'help'=>'presence of symmetry peak, distance in pdb file if one is given, and '. 'alternative assignments will be reported' }, '-psymmnoe'=>{ 'category'=>'noe', 'synopsis'=>'write symmetry related noe peaks as aria '. 'distance restraint file and save remaining peaks '. 'as unassigned peak lists (named after corresponding '. 'spectra with .list extension) in sparky format, while '. 'ignoring peaks not matching the assignment table '. 'and/or experiment type (read help for more info)', 'par'=>{ 'required'=>{ -in=>'input dataset file. The data must be '. 'previously searched for symmetry peaks '. 'and filtered against the assignment table', -orest=>'output aria restraint file name' }, 'optional'=>{ -pdb=>{ 'description'=>'pdb file to filter out '. 'noe\'s inconsistent with molecular '. 'geometry', 'default'=>undef }, -base=>{ 'description'=>'name of the spectrum to '. 'use as the '. 'source of peak intensities. '. 'This parameter is required if none '. 'of the spectra are calibrated in '. 'at least one symmetry pair', 'default'=>undef }, -chir=>{ 'description'=>'scramble prochiral and '. 'other equivalent atoms', 'default'=>'false' }, -cal=>{ 'description'=>'calibration constants per spectrum basis '. 'format example cnoe=2.5e-4 nnoe=3.54e-3', 'default'=>undef } } }, 'fun'=>$toolkit::{psymmnoe}, 'help'=>'If noe calibration constant is provided externally '. 'or was stored in the dataset previously '. 'for at least one peak within a given symmetry pair, '. 'corresponding restraint '. 'will be stored as calibrated aria restraint. '. 'Otherwise, it will '. 'output as uncalibrated restraint and peak intensity will be '. 'used from the spectrum whose name is taken from value '. 'provided with parameter -base '. 'When both peaks in the symmetry pair come from the same '. 'spectrum, smaller value of volume or larger value of '. 'distance will be used for output (to get a conservative '. 'estimate of distance restraint). Also, largest value of '. 'distance will be used for output when two peaks '. 'within a symmetry pair come from different spectra '. 'and both of them have noe intensities calibrated to '. 'distances' }, '-p2s'=>{ 'category'=>'data_IO', 'synopsis'=>'translate ASSIGNMENT TABLE from pipp to sparky format '. '(CAVEAT: see -help)', 'fun'=>$toolkit::{asspipp2sparky}, 'par'=>{ 'required'=>{ -in=>'input assignment table file in pipp format', -out=>'sparky format output file' } }, 'help'=>'CAVEAT: current implementation does not translate '. 'atom names between different nomenclatures' }, '-ass2tbl'=>{ 'synopsis'=>'produce readable printout of assignment table entries', 'category'=>'data_IO', 'help'=>'', 'fun'=>$toolkit::{ass2tbl}, 'par'=>{ 'required'=>{ -in=>'input assignment table', -fmt=>'format of assignment table', -nom=>'atom name nomenclature', -atoms=>'list of atom names in IUPAC format stripped off '. 'tha atom number, e.g. -atoms HA HB C CB. '. '(leave out atom # i.e. use HB, not HB2)' } } }, '-as2p'=>{ 'category'=>'data_IO', 'help'=>'NOTE: (-chir option) those assignments in the input '. 'file that '. 'already have been entered as scrambled will not be '. 'changed. Format for scrambled assignments is one of the '. 'following two: '. '1) xplor HB2#, HB% style or 2) regex style HB[23] where '. 'regular expression syntax is limited to use of [] brackets. '. 'For example regular expression HD[12][123] will translate to: '. 'HD11|HD12|HD13|HD21|HD22|HD23, and H[BG][23] to HB2|HB3|HG2|HG3', 'fun'=>$toolkit::{asssparky2pipp}, 'par'=>{ 'required'=>{ -in=>'input sparky file', -out=>'output pipp file', -nom=>'atom name nomenclature in input file '. 'iupac or xplor' }, 'optional'=>{ -onom=>{ 'default'=>undef, 'description'=>'desired atom name '. 'nomenclature in output pipp file' }, -chir=>{ 'default'=>'false', 'type'=>'flag', 'description'=>'scramble prochiral and equivalent '. 'atom assignments, i.e. ALA.HB3 -> ALA.HB3|HB2 '. '(see NOTE)' } } }, 'synopsis'=>'translate sparky assignment table to pipp format, '. 'optionally translating atom name nomenclature between '. 'xplor and iupac, and also optionally scrambling prochiral and '. 'aquivalent atom assignment' }, '-s2p'=>{ 'category'=>'data_IO', 'synopsis'=>'translate PEAK TABLE from sparky .save to pipp .PCK format '. '(CAVEAT: see -help)', 'fun'=>$toolkit::{pksparky2pipp}, 'par'=>{ 'required'=>{ -in=>'input sparky .save file', -out=>'pipp .PCK output file', }, 'optional'=>{ -axes=>{ 'default'=>undef, 'description'=>'set correspondence of sparky dimenstions (w1..w4) to pipp (x,y,z,a)'. "e.g.: >$prog ... -axes x=w2 y=w1 z=w3 a=w4" }, -label=>{ 'default'=>undef, 'description'=>'choose dimensions for assignment label label in output'. ": >$prog ... -label w1 w2" } } }, 'help'=>'WALKTHROUGH: 1) open your data in pipp and close it, at this '. 'point you should obtain empty .PCK file 2) run this conversion script '. '3) take header from .PCK file that you got in step 1 and put it on top '. 'of file that was generated in step 2. That should be it..'. 'FURTHER DETAILS: To determine how axes in pipp X, Y, etc correspond to '. 'axes in sparky w1, w2 etc., '. 'open data in both programs and look at shifts of some '. 'well placed peak. Also you may figure it out just by trial and error '. 'by trying to open pipp data with new .PCK file. '. 'NOTE: remember to use \'setenv PK_TABLE_FORMAT 1\' in pipp startup '. 'script. '. 'CAVEAT: this implementation does not write out assignments '. 'but but only saves unassigned peaks picked in SPARKY '. 'for later display in PIPP' }, '-extract'=>{ 'synopsis'=>'extract peaklist from a dataset and save it '. 'in a separate file', 'category'=>'internal', 'par'=>{}, 'fun'=>$toolkit::{extractExp}, 'help'=>'' }, '-delpar'=>{ 'synopsis'=>'delete parameter from a peaklist', 'category'=>'internal', 'par'=>{}, 'fun'=>$toolkit::{delExpPar}, 'help'=>'' }, '-setpar'=>{ 'synopsis'=>'set parameter to a peaklist in imported data', 'category'=>'internal', 'par'=>{ 'required'=>{ -par=>'parameter name', -spectrum=>'spectrum name', -val=>'parameter value', -file=>'data file name' } }, 'fun'=>$toolkit::{setExpPar}, 'help'=>'' }, '-mvexp'=>{ 'synopsis'=>'rename peaklist in the imported data file', 'category'=>'DATA', 'par'=>{ 'required'=>{ -to=>'new name', -from=>'old name', -file=>'data file' } }, 'fun'=>$toolkit::{mvexp}, 'help'=>'' }, '-addexp'=>{ 'synopsis'=>'add another peak list to an existing one', 'category'=>'internal', 'par'=>{}, 'fun'=>$toolkit::{delExp}, 'help'=>'' }, '-delexp'=>{ 'synopsis'=>'remove dataset from imported peaklist', 'category'=>'internal', 'par'=>{ 'required'=>{ -dat=>'data file', -spectrum=>'spectrum name' } }, 'fun'=>$toolkit::{delExp}, 'help'=>'' }, '-inspect'=>{ 'synopsis'=>'print summary of imported peaklists and assignments', 'category'=>'DATA', 'par'=>{'required'=>{ -file=>'data file name' }, 'optional'=>{ -spectrum=>{ 'description'=>'name of spectrum', 'default'=>undef } } }, 'fun'=>$toolkit::{inspect}, 'help'=>'if -spectrum parameter is used, all parameters and details to ', 'that particular spectrum will be printed' }, '-setaxis'=>{ 'help'=>'', 'category'=>'DATA', 'fun'=>$toolkit::{setaxis}, 'par'=>{ 'required'=> { -file=>'data file name', -spectrum=>'spectrum name', -axis=>'axis name (X,Y,Z, or A)' }, 'optional'=> { '-ref'=>{ 'description'=>'shift offset for 0 Hz frequency', 'default'=>undef }, -unfold=>{ 'type'=>'flag', 'default'=>'false', 'description'=>'flag that turns off axis folding' }, -fold=>{ 'type'=>'flag', 'default'=>'false', 'description'=>'flag that turns on axis folding for '. 'analysis' }, -sw=>{ 'description'=>'spectral width in ppm', 'default'=>undef }, -label=>{ 'description'=>'text label (important for noe)', 'default'=>undef }, -tol=>{ 'description'=>'chemical shift tolerance for peak analysis', 'default'=>undef }, -lim=>{ 'description'=>'lower and upper limits of chemical '. 'shift (two numbers)', 'default'=>undef } } }, 'synopsis'=>'mark axes in spectral data with labels and other '. 'parameters so that other analysis routines could work' }, '-noefilt'=> { 'synopsis'=>'read noe list, apply a choice of filter and save '. 'the resulting noe list', 'futurecategory'=>'noe', 'category'=>'internal', 'fun'=>$toolkit::{noefilt}, 'futurecategory'=>'noe', 'par'=>{ 'required'=>{ -in=>'input noe list (in internal format, see -importpeaks)', -out=>'output noe list file (internal format)', -method=>'choice of noe filtering method (see more help). '. 'Options: ass, integrated, symm, nosymm, width, dist,'. ' mergesymm.' }, 'optional'=>{ -cutoff=> { 'description'=>'maximum distance assignable to noe contact, A', 'default'=>5.0 }, -ass=>{ 'description'=>'assignment table in sparky format, required for '. '\'ass\' method', 'default'=>undef }, -width=>{ 'description'=>'Used with \'width\' method. '. 'Only one file at a time allowed (with -in option). '. 'format ax1=: ax2=: ... axN=:, '. 'where and etc. are lower and upper limits of '. 'acceptable peak width for each axis. Axis order is the same as in '. 'the source peak table (as imported from external software).', 'default'=>undef }, -pdb=>{ 'description'=>'Used with \'dist\' method. List of pdb structure files. '. 'Minimum distances will be selected from all files for noe filtering.', 'default'=>undef }, -nom=>{ 'description'=>'May be used in addition to -pdb option to specify '. 'atom name nomenclature in the pdb files. Options: xplor, pdb, iupac', 'default'=>'xplor' }, -dst=>{ 'type'=>'flag', 'default'=>'false', 'description'=>'use if diastereotopic atoms are ass unambiguously' } } }, 'help'=>'Method \'ass\' will filter assignable peaks based on '. 'assignment table, type of noe spectrum and aminoacid connectivity. '. 'Method also requires symmetrization filtering applied previously with \'symm\' method' }, '-importass'=> { 'category'=>'DATA', 'synopsis'=>'Import assignments from sparky or pipp projects and '. 'add them to the data in the internal format', 'par'=>{ 'optional'=>{ -seq=>{ 'description'=>'protein sequence file', 'default'=>undef } }, 'required'=>{ -dat=>'name of data file in the internal format', -ass=>'name of pipp or sparky assignment table file', -fmt=>'format of assignment table pipp or sparky', -nom=>'atom naming system (iupac or xplor)' } }, 'fun'=>$toolkit::{importAss}, 'help'=>'1. If an optional parameter -seq is used, only assignments that are '. 'in agreement with the peptide sequence will be imported'."\n". '2. NB: With SPARKY use limited regular expressions '. 'to label atoms (with only character classes allowed, e.g. H[GD][12] that is '. 'is means "HG1 or HG2 or HD1 or HD2").' }, '-exportass'=> { 'category'=>'internal', 'futurecategory'=>'data_IO', 'synopsis'=>'Open resonance list file in the internal format and save '. 'it in sparky or pipp format', 'par'=>{ 'optional'=>{ -seq=>{ 'description'=>'name of the peptide sequence file', 'default'=>undef }, -format=>{ 'description'=>'format of output file(s) sparky or pipp', 'default'=>'sparky' }, -nom=>{ 'description'=>'atom name nomenclature system '. ' (recognized options are iupac, pdb, xplor', 'default'=>'xplor' } }, 'required'=>{ -out=>'name of output file', -in=>'name of input file' } }, 'fun'=>$toolkit::{exportAss}, 'help'=>'1. If an optional parameter -seq is used, only assignments that are '. 'in agreement with the peptide sequence will be imported'."\n". '2. NB: With SPARKY use limited regular expressions '. 'to label atoms (with only character classes allowed, e.g. H[GD][12] that is '. 'is means "HG1 or HG2 or HD1 or HD2").' }, '-mergepeaks'=> { 'category'=>'internal', 'futurecategory'=>'spec_analysis', 'synopsis'=>'Refine peak coordinates in a given list by using the best '. 'coordinates of two peak lists', 'par'=>{ 'required'=> { -list1=>'the peak list (in internal format, see -importpeaks) '. 'subject to coordinate refinement', -list2=>'second peak list with more precise peak positions', -out=>'name of refined output peak list file', -axes=>'axes in the \'list1\' file (e.g. \'1 3 4\') to be '. 'matched against \'list2\' file ' }, 'optional'=> { -axes2=>{ 'description'=>'axes for the \'list2\' file '. 'given in the order corresponding to '. 'value of -axis. If not provided, program will '. 'try to match axes in two spectra automatically', 'default'=>undef } } }, 'help'=>'This function is intended for use when there are two spectra '. 'where one is better resolved but has wider signals then the other '. 'and it is desirable to refine coordinates of more resolved spectrum '. '(because its signals are nevertheless wider). '. 'Two such spectra may be 4D and 3D with 4D spectrum having broader '. 'signals due to limited sampling but excellent resolution due to '. 'higher dimensionality. AXES: the -axes parameter required by '. 'this function refers to the \'list1\' spectrum. The program will '. 'try to automatically match them to axes in file \'list2\' unless '. 'parameter -axes2 is provided.', 'fun'=>$toolkit::{mergeRefinePeakCoor} }, -showdup=> { 'category'=>'spec_analysis', 'synopsis'=>'find duplicate peaks wihin tolerance and print the result. ', 'par'=>{ 'required'=> { -in=>'input file', # -out=>'output file', -tol=>'chemical shift tolerances. To be used with '. '\'isolated\' method (see more help).' }, 'hardcoded'=>{ -method=>'showdup' } }, 'fun'=>$toolkit::{filtPeaks}, 'help'=>'probably user would want to find the duplicate peaks '. 'then remove them with the favorite data analysis program '. 'and import the improved data again' }, '-isolatedpeaks'=> { #todo convert this function to -peakfilt with methods #'category'=>'internal', 'category'=>'spec_analysis', 'synopsis'=>'Read imported peak list (see -importpeaks), select '. 'well resolved signals and save them into a file', 'par'=>{ 'required'=> { -in=>'input file', -out=>'output file', -tol=>'chemical shift tolerances. To be used with '. '\'isolated\' method (see more help).' }, 'optional'=> { -showdup=>{ 'description'=>'shows duplicate peaks (within -tol). '. 'NOTE: this option completely changes function '. 'behavior. Unresolved peaks are only reported', 'default'=>'false', 'type'=>'flag' } }, 'hardcoded'=>{-method=>'isolated'} }, 'help'=>'-tol argument should be in one of the two formats:'. 'something like N=0.1 C=0.15 H=0.04 OR ax1=0.1 ax2=0.15 .., '. 'first format can be used if file already contains some assignments '. 'second can only be used for the file that has no assignments or '. 'if you dont want to use those assignments to guess what nuclei '. 'do the axes correspond to. In addition, second format may only be used '. 'for one file at a time. '. 'N C H are matching expressions for the '. 'nuclei, numbers - chemical '. 'shift tolerance. Matching expressions: nucleus type will be '. 'matched against '. 'atom type in the assignment (i.e. sparky or pipp project). '. 'For example, to match CO only use CO as the matching '. 'expression, but if you want to' . 'select all carbon atoms use - C (and it will match CA, CB, CO etc.). '. 'If you want to discriminate CO from others use CO for CO '. 'and \'C[ABCD]\' for others. Note that expression with special '. 'symbols must be quoted to avoid shell expansion '. '(Actually you can specify a Perl-style '. 'regular expression for precise matching)', 'fun'=>$toolkit::{filtPeaks} }, '-importpeaks'=> { 'category'=>'DATA', 'synopsis'=>"Read pipp .PCK file or sparky .save and add it to the data project ". "in the internal format (CAVEAT: read help).", 'par'=>{ 'required'=> { -peaks=>'input peak list file', -dat=>'project data file', -fmt=>'peak list format (pipp|sparky)' # -nom=>'atom naming system (xplor|iupac)' } }, 'fun'=>$toolkit::{importpeaks}, 'help'=>'CAVEATS: peak assignments are not read in for now, and axis order is assumed correct '. 'i.e. please make sure that columns in PCK file or axis order (w1..w3) in .save '. 'file is what is expected. For 3D NOESY data (nnoe and cnoe) '. 'Directly detected proton is in first dimension, heteroatom '. 'in second dimension and indirect proton in third. For other data please open '. 'source code of this program and read subroutine '. 'internal::spectrum::defaults::axis' }, '-cal'=> { 'category'=>'data_proc', 'fun'=>$toolkit::{calibrate_shifts}, 'par'=>{ required=> { '-ref'=>'current center of the spectrum', '-shifts'=>"Two-column calibration file. ". "First column should have current (uncalibrated) shifts, ". "and second column - ideal shifts from the reference spectrum. ". "Lines that start with # are ignored." }, optional=> { '-sw'=>{ 'description'=>'Current spectral width. If it is '. 'defined, sw will also be adjusted by fitting', 'default'=>undef } } }, 'synopsis'=>"Calibrate chemical shift offset for one spectral axis ". " and (optionally) spectral width, using linear regression." }, '-par'=> { 'futurecategory'=>'data_proc', 'category'=>'internal', 'fun'=>$toolkit::{getacqpar}, 'help'=>'parameter names are case-insensitive, elements of '. 'array parameters are retrieved by appending a number '. 'e.g. p1. If number is not appended, all array values will be '. 'printed', 'par'=>{ default=>{ description=>'parameter name', alias=>'name' } }, 'synopsis'=>"Retreive bruker parameter by its name ". "(experiment must be in current directory)" }, '-checkass'=> { 'category'=>'internal', 'futurecategory'=>'spec_analysis', 'fun'=>$toolkit::validate::{ass}, 'synopsis'=>"Perform sanity check on assignments made in Sparky v3.1 project in " ."current directory" }, '-findres'=> { 'category'=>'spec_analysis', 'fun'=>$toolkit::{findres}, 'par'=>{ required=>{ -shifts=>'assignment table', -fmt=>'assignment table format (sparky|pipp)', -nom=>'atom name nomenclature (iupac|xplor)', -res=>'Approximate resonance frequency (must be within '. 'tolerance -tol to match)', }, optional=>{ -tol=>{description=>'Resonance frequency search tolerance', default=>0.03} } }, 'synopsis'=>"Find resonances within a certain interval of chemical shift in sparky". "assignment table" }, '-searchp'=> { 'category'=>'internal', 'futurecategory'=>'spec_analysis', 'fun'=>$::{searchp}, 'synopsis'=>"Find peaks containing a resonance at certain chemical shift" }, '-reg'=> { 'futurecategory'=>'spec_analysis', 'category'=>'internal', 'fun'=>$::{preg}, 'synopsis'=>"Report aliases of unassigned regions" }, '-idexp'=> { 'category'=>'data_proc', 'fun'=>$toolkit::{idexp}, 'par'=>{ optional=> { -title=>{ description=>'print title for experiment (overriding automatic title)', default=>undef }, -dir=>{ description=>'experiment directory', default=>'.' }, -comment=>{ description=>'user comment', default=>undef } #'-brief'=>{ # description=>'if selected, one line summary will be printed', # type=>'flag' #} }, 'hardcoded'=>{'-val'=>1} }, 'synopsis'=>"Print details of nmr experiment in current directory", }, '-ppsetup'=> { 'category'=>'internal', 'futurecategory'=>'exp_setup', 'fun'=>$toolkit::{idexp}, 'synopsis'=>"Read pulse program and tell which parameters need to be set" }, '-ppstat'=> { 'category'=>'exp_setup', 'futurecategory'=>'exp_setup', 'par'=>{ 'optional'=>{ -users=>{ 'description'=>'user name(s)', 'default'=>undef }, -pp=>{ 'description'=>'part(s) of pulse program name(s)', 'default'=>undef }, -list=>{ 'description'=>'print only list of pulseprograms', 'default'=>'false', 'type'=>'flag' }, } }, 'fun'=>$toolkit::{searchpp}, 'synopsis'=>"Print statistics of pulse program usage and paths to datasets" }, '-conv'=> { 'category'=>'data_proc', 'fun'=>$toolkit::{conv}, 'par'=> { 'optional'=> { -dir=>{ description=>'directory where acqu*s files are', default=>'./' }, -axisorder=>{ description=>"parameter consisting of two or ". "three digits for 2D and 3D data, respectiely.". " For example 132 would switch axes Y and Z, ". "etc.", default=>undef }, #-mhz=>{ # 'description'=>'spectrometer frequency in Mhz e.g. 600 or 800', # 'default'=>undef # }, #'-ref'=>{ # 'description'=>'ppm value for chemical shift at zero larmor frequency', # 'default'=>undef #}, #'-noref'=>{ #'description'=>'use this flag to turn off automatic referencing '. #'for spectrum dimensions', #'default'=>'false', #'type'=>'flag' #} } }, 'synopsis'=>"Generate nmrPipe conversion script for 2D and 3D Bruker data", 'help'=>'NOTE: you will still have to find right calibration for chemical shifts' #'help'=>'parameters -ref and -noref cannot be used together' } #'-genMARS'=> #{ # 'category'=>'data_IO', # 'fun'=>$toolkit::{genmars}, #'par'=>{ # 'required'=>{ # '-seq'=>'protein sequence', # '-fmt'=>'format of peak table (sparky|pipp)', # '-pck'=>'list of peak files (.save for sparky, .pck for pipp)' # }, # 'optional'=>{ # '-exclude'=>{ # 'description'=>'list of atoms to exclude from MARS input '. # '(restricted to: CA, CB, CO, HA, H, N)', # 'default'=>[] #} #} #}, # 'synopsis'=>"Setup input for MARS calculation (automatic protein backbone ". #"assignment) based on pipp .pck or sparky .save files" #} ); my $toolkit = toolkit->run(\@ARGV); exit; package toolkit; sub toolkit::debug { my %nom = bio::protein::aminoacid::atom::nomenclature(); util::readfile(glob('~/.tcshrc')); app::error("just crashlanded"); foreach my $aa (keys %nom) { my $atoms = $nom{$aa}; foreach my $atom (@$atoms) { $atom->{cara} = $atom->{iupac}; } } util::print(\%nom); } sub toolkit::junk { my $self = shift; my $arg = $self->{argv}; my $ifmt = $arg->{-p1}; my $ofmt = $arg->{-p2}; my $in = $arg->{-default}; my $ass = sparky::readass($in); foreach my $a (@$ass) { if ($a->{atom} =~ /^C/) { if ($a->{atom} eq 'C') { $a->{'shift'} += 2.76; } else { $a->{'shift'} += 2.33; } } elsif ($a->{atom} =~ /^N/) { $a->{'shift'} += 0.46; } } foreach my $a (@$ass) { $a->{atom} = bio::protein::aminoacid::atom::translate( -from=>'iupac', -to=>'iupac', -atom=>$a->{atom}, -aminoacid=>$a->{residue} ); } pipp::saveass($ass,'ASS.NEW'); } sub dist2xplor { my $self = shift; my $file = $self->{argv}{-in}; my @lines = util::readfile($file); my $done = 0; my $count = 0; while (!$done) { my $line = shift @lines; if ($line =~ /^\s*$/) { $count++; } if ($count == 2) { $done = 1; } } foreach my $line (@lines) { $line =~ s/^\s+//; my @bits = split /\s+/, $line; my $no1 = $bits[0]; my $nm1 = $bits[2]; my $no2 = $bits[3]; my $nm2 = $bits[5]; my $dist = $bits[7]; $dist =~ /$($NUMPATTERN)\.\.($NUMPATTERN)$/; my $uplim = $2; my $lolim = $1; my $dminus = sprintf("%.2f",$uplim - $lolim); print "assign (resid $no1 and name $nm1) (resid $no2 and name $nm2) $uplim $dminus 0.0\n"; } } sub pt2spec { #todo this function is not finished #there is an assumption that whole spectrum is given to calculate ppm/point #that is spectral regions will not be processed correctly unless -sw is given #for that spectral region #also output in Hz is not supported my $self = shift; my $arg = $self->{argv}; my $in = $arg->{-in}; my $obs = $arg->{-obs}; my $sw = $arg->{-sw}; my $first = $arg->{-first}; my $cen = $arg->{-cen}; my @lines = util::readfile($in); my @points; foreach my $line (@lines) { $line =~ /^$NUMPATTERN\s+($NUMPATTERN)$/; push @points, $1; } my $ppm_per_point = $sw/scalar(@points); if ($sw =~ /^($NUMPATTERN)\hz$/i) { my $swh = $1; if (not defined $obs) { app::error('-obs must be used if -sw is given in hz'); } $sw = $swh/$obs; } elsif ($sw =~ /^($NUMPATTERN)\ppm$/i) { $sw = $1; } else { app::error("-sw must have either Hz or ppm postfix (e.g. 10ppm or 153.4Hz)"); } if (not defined $first and not defined $cen) { app::error("either -cen or -first parameters must be used"); } else { if (defined $first) { if ($first =~ /^($NUMPATTERN)\ppm$/i) { $first = $1; } else { app::error("-first must have either ppm postfix (e.g. 10ppm)"); } } if (defined $cen) { print $cen,"\n"; if ($cen =~ /^($NUMPATTERN)\ppm$/i) { $first = $1 - $ppm_per_point * (scalar(@points)/2); } else { app::error("-cen must have either ppm postfix (e.g. 10ppm)"); } } } my $ppm = $first; foreach my $point (@points) { print "$ppm $point\n"; $ppm += $ppm_per_point; } } sub txt2rdc { my $self = shift; my $arg = $self->{argv}; my $names = util::array::insure($arg->{-names}); my $file = $arg->{-in}; my $offset = $arg->{-offset}; my $neg = $arg->{-neg}; my $class = $arg->{-class}; my $err = $arg->{'-err'}; my $scale = $arg->{'-scale'}; my $pales = $arg->{'-pales'}; my $seq = $arg->{'-seq'}; my @known_rdcs = ('CACO','NCO','HACA'); my %rdc_nuclei = ( 'CACO'=>['13C','13C'], 'NCO'=>['15N','13C'], 'HACA'=>['1H','13C'], 'HN'=>['1H','15N']); my %rdc_dist = ( 'CACO'=>1.525, 'NCO'=>1.341, 'HACA'=>1.08, 'HN'=>1.02); if (scalar(@$names) != 2) { app::error("two XPLOR atom names must be provided with -names parameter"); } if (util::array::isa($file)) { app::error("only one file at a time can be processed"); } if ($offset !~ /^[+-]?\d+$/) { app::error("number expected with the -offset parameter"); } if (util::array::isa($class) or length($class) > 4) { app::error("-class must be one word of no more the four characters"); } if (not util::scalar::isnum($err)) { app::error("a number is expected as a value or -err parameter"); } if (defined $scale and not util::array::isin($scale,\@known_rdcs)) { app::error("value of -scale parameter may only be one of ".join(',',@known_rdcs)); } my @lines = util::readfile($file); @lines = grep !/^!/, @lines; my %rdc; foreach my $line (@lines) { chomp $line; next if $line =~ /^#/; $line =~ s/\s+$//; if ($line =~ /^(\d+)\s+($NUMPATTERN)\s+($NUMPATTERN)$/) { my $res = $1; my $isotropic_coupling = $2; my $aligned_coupling = $3; my $rdc = $aligned_coupling - $isotropic_coupling; my $res_no = $res + $offset; if ($res_no < 1) { app::error("getting negative residue number for input line\n$line\n". "with offset of $offset"); } $rdc{$res_no} = $rdc; } else { app::error("could not understand line:\n$line\npositive integer". " followed by two numbers expected"); } } if ($neg eq 'true') { foreach my $resno (keys %rdc) { $rdc{$resno} *= -1; } } if (defined $scale) { my $nuc = $rdc_nuclei{$scale}; my $dist = $rdc_dist{$scale}; my $nuc_target = $rdc_nuclei{'HN'}; my $dist_target = $rdc_dist{'HN'}; my $dist_target3 = $dist_target*$dist_target*$dist_target; my $dist_3 = $dist*$dist*$dist; my $factor = (nmr::gamma($nuc_target->[0])*nmr::gamma($nuc_target->[1])*$dist_3)/ (nmr::gamma($nuc->[0])*nmr::gamma($nuc->[1])*$dist_target3); foreach my $resno (keys %rdc) { printf "%-4d%5.2f\n",$resno, $rdc{$resno}*$factor; } } elsif ($pales eq 'true') #pales output { if (not defined $seq) { app::error("-seq must be used for PALES output"); } my @seq = bio::protein::sequence::read($seq); @seq = bio::protein::sequence::format(-seq=>\@seq,-style=>'three_letter'); print "VARS RESID_I RESNAME_I ATOMNAME_I RESID_J RESNAME_J ATOMNAME_J D DD W\n"; print "FORMAT %5d %6s %6s %5d %6s %6s %9.3f %9.3f %.2f\n"; my $fmt = "%5d%7s%7s%6d%7s%7s%11.3f%11.3f%6.2f\n"; my $atom1 = $names->[0]; my $atom2 = $names->[1]; my @res = sort {$a<=>$b} keys %rdc; foreach my $resno (@res) { my $aa = $seq[$resno]; my $rdc = $rdc{$resno}; printf $fmt,$resno,$aa,$atom1,$resno,$aa,$atom2,$rdc,$err,1.0; } } #xplor RDC output else { my $atom1 = $names->[0]; my $atom2 = $names->[1]; my @res = sort {$a<=>$b} keys %rdc; foreach my $resno (@res) { my $rdc = $rdc{$resno}; print "assign ( resid 500 and name OO )\n"; print " ( resid 500 and name Z )\n"; print " ( resid 500 and name X )\n"; print " ( resid 500 and name Y )\n"; printf " ( resid %3d and name $atom1 )\n", $resno; printf " ( resid %3d and name $atom2 ) $rdc %4.2f\n", $resno,$err; } } } sub noe2txt { my $self = shift; my $seq = $self->{argv}{-seq}; my $tbl = util::array::insure($self->{argv}{-tbl}); my $restraints = xplor::noe::read($tbl); my @seq = bio::protein::sequence::read($seq); foreach my $restraint (@$restraints) { print xplor::noe::text($restraint,\@seq); } } sub calc_restraint_report{ my $self = shift; my $noe_files = util::array::insure($self->{argv}{-noe}); my $hnha_files = util::array::insure($self->{argv}{-hnha}); my $noe = xplor::noe::read($noe_files); my $hnha = xplor::hnha::read($hnha_files); my @seq = bio::protein::sequence::read($self->{argv}{-seq}); my $ass = nmr::readass($self->{argv}{-ass},$self->{argv}{-assfmt},$self->{argv}{-assnom},\@seq); #dist dminus dplus selections my $plot = new reports::protein::restraints('sequence'=>\@seq, 'noe'=>$noe, 'hnha'=>$hnha, 'ass'=>$ass); $plot->stdout(); } sub count_noe_restraints { my $self = shift; my $seq = $self->{argv}{-seq}; my $tbl = util::array::insure($self->{argv}{-tbl}); my $dir = $self->{argv}{-dir}; my $restraints = xplor::noe::read($tbl); my $sequence = bio::protein::sequence::read($seq); my ($short,$intra,$sequential,$medium,$long,$combined,$simple) = (0,0,0,0,0,0,0); my ($ambiguous_residue) = (0); my @ambiguous_residue; my @long; my @intra; my @medium; my @sequential; my @short; foreach my $rest (@$restraints) { my $sel = $rest->{selections}; if (scalar(@$sel)>1) { $combined++; next; } $simple++; my $pair = $sel->[0]; #if there are two or more different residues involved in one selection #then count them as 'ambiguous residue assignment' #if one selection has two different atom types count as ambiguous atom assignment my $ambres = 0; foreach my $group (@$pair) { my %resid; my %name; foreach my $atom (@$group) { $resid{$atom->[0]} = 1; $atom; #????? } $ambres = 1 if (scalar (keys %resid) > 1); } if ($ambres == 1) { $ambiguous_residue++; push @ambiguous_residue, $rest; next; } #if there are two or more different atom types from the same residue involved #do this one later my $gr1 = $pair->[0]; my $gr2 = $pair->[1]; my $i = $gr1->[0][0]; my $j = $gr2->[0][0]; my $delta = abs($i-$j); #count short (less then 3 bonds, intraresidue, etc) #count sequential #count medium range (|i-j|<=5) #count long range (|i-j|>5 if ($delta == 0) { my @at1 = map {$_->[1]} @$gr1; my @at2 = map {$_->[1]} @$gr2; #short is when both groups of protons connect to the same heteroatom my $prefix1 = util::array::common_prefix(\@at1); my $prefix2 = util::array::common_prefix(\@at2); my $diff1 = abs(length($prefix1)-length($at1[0])); my $diff2 = abs(length($prefix2)-length($at2[0])); if ($diff1 > 1) { #ambigatom leave for later } elsif ($diff1 == 0) { $prefix1 =~ s/\d$//; } if ($diff2 > 1) { #ambigatom leave for later } elsif ($diff2 == 0) { $prefix2 =~ s/\d$//; } if ($prefix1 eq $prefix2) { $short++; push @short, $rest; } else { $intra++; push @intra, $rest; } } elsif ($delta == 1) { $sequential++; push @sequential, $rest; } elsif ($delta > 4) { $long++; push @long, $rest } else { $medium++; push @medium, $rest; } } my @num = ($long,$medium,$sequential,$intra,$short,$ambiguous_residue); my @lbl = qw (long medium sequential intra short); push @lbl, 'ambiguous residue assignment'; my @restraints = (\@long,\@medium,\@sequential,\@intra,\@short,\@ambiguous_residue); for (my $i=0;$i{argv}; my $seq = $arg->{-seq};#protein sequence file my $tbl = util::array::insure($arg->{-tbl});#list of xplor restraint files my @seq = bio::protein::sequence::read($seq); my %REST; foreach my $source_file (@$tbl) { use Cwd 'abs_path'; $source_file = abs_path($source_file); my @lines = util::readfile($source_file); @lines = grep {!/^\!/ and !/^\s*$/} @lines; my %iupac_lib;#library of aminoacid atoms my %nom = bio::protein::aminoacid::atom::nomenclature(); foreach my $aa (keys %nom) { my $aa_atoms = $nom{$aa}; foreach my $atom (@$aa_atoms) { my $name = $atom->{'iupac'}; my $ambig_regex = $atom->{'ambig_regex'}; if (defined $ambig_regex) { my @brothers;#vicinal atoms and like foreach my $a (@$aa_atoms) { my $n = $a->{'iupac'}; push @brothers, $n if $n =~ /^$ambig_regex$/; } $iupac_lib{$aa}{$name} = \@brothers; } else { $iupac_lib{$aa}{$name} = [$name]; } } } #translate iupac library to xplor library my %xplor_ambiguation_lib; foreach my $aa (keys %iupac_lib) { next if $aa eq 'X'; foreach my $i_a (keys %{$iupac_lib{$aa}}) { my $x_a = bio::protein::aminoacid::atom::translate(-from=>'iupac', -to=>'xplor', -aminoacid=>$aa, -atom=>$i_a, -table=>\%nom); my $i_atoms = $iupac_lib{$aa}{$i_a}; my @x_atoms = map { bio::protein::aminoacid::atom::translate(-from=>'iupac', -to=>'xplor', -aminoacid=>$aa, -atom=>$_, -table=>\%nom)} @$i_atoms; $xplor_ambiguation_lib{$aa}{$x_a} = \@x_atoms; } } my $xplor_atom_table = bio::protein::aminoacid::getatomtable('xplor'); while (@lines) { my $line = shift(@lines); if ($line =~ /ASSI/i) { # @lines has restraint lines in it # $res1 and $res2 are intervals #1 and #2 # each interval might be either one residue #[.] # or interval # my $rest = xplor::parse_rest(\@lines,$line); my $ambig = xplor::ambiguate_restraint($rest,\%xplor_ambiguation_lib, $xplor_atom_table,\@seq); $ambig->{file} = $source_file; my $error_message; if (not xplor::restraint_is_valid($ambig,\$error_message)) #check for missing data that would indicate that restraint could not be #parsed correctly { xplor::ambiguate_restraint_debug($rest,\%xplor_ambiguation_lib, $xplor_atom_table,\@seq); app::message("could not parse the following restraint:"); xplor::print_restraint($rest); app::message("either sequence is not compatible with restraint file(s) ". "or you have a non-standard aminoacids and/or other molecules"); app::message($error_message); exit 1; } my $rest_key = xplor::gen_restraint_key($ambig); if (not defined $REST{$rest_key}) { $REST{$rest_key} = [] } push @{$REST{$rest_key}},$ambig; } } } my @SELECTED_RESTRAINTS; foreach my $key (keys %REST) { my $alt = $REST{$key}; my $uplim = xplor::get_upper_limit($alt->[0]); my $weakest = $alt->[0]; shift @$alt; foreach my $a (@$alt) { if (xplor::get_upper_limit($a) < $uplim) { $weakest = $a; } } push @SELECTED_RESTRAINTS, $weakest; } my %RESTRAINT_GROUPS; foreach my $restraint (@SELECTED_RESTRAINTS) { my $file = $restraint->{file}; push @{$RESTRAINT_GROUPS{$file}}, $restraint; } foreach my $group (keys %RESTRAINT_GROUPS) { my $RESTRAINTS = $RESTRAINT_GROUPS{$group}; foreach my $R (@$RESTRAINTS) { xplor::print_restraint($R); print "\n"; } } } sub pdbseg { my $self = shift; my $arg = $self->{argv}; my $pdb = util::array::insure($arg->{-pdb}); my $list = $arg->{-list}; my $from = util::array::insure($arg->{-from}); my $to = $arg->{-to}; my $dir = $arg->{-dir}; if ($list eq 'true' and (defined $from or defined $to or defined $dir)) { app::error("If -list is used, then -to, -from, and -dir cannot. ". "Use -list to see what segments are available in pdb files, ". "and dont use it when you want to renumber residues"); } my %from; if (defined $from or defined $to or defined $dir) { if (not (defined $from and defined $to and defined $dir)) { app::error("options -from, -to, and -dir must be used together"); } else { my $n = scalar(@$from); if ($n/2 != int($n/2)) { app::error("parameter -from must have even number of values. ". 'For example: "-from dnaA 100 dnaB 120" will add 100 '. 'to residue numbers from segment dnaA, etc.'); } %from = @$from; my @incr = values %from; my @bad = grep !/^[+-]?[1-9]\d*$/, @incr; if (scalar @bad > 0) { app::error("increment values must be integer numbers, positive or negative"); } } } else { if ($list eq 'false') { app::error("Either use -list or -from, -to and -dir to specify action"); } } my %files; foreach my $f (@$pdb) { my @lines = util::readfile($f); if (grep /^MODEL/, @lines) { app::error("problem reading file $f, pdbseg does not ". "support bundle pdb files"); } $files{$f} = \@lines; } my %seg;#key - segment name value - range of residues e.g. 10-55 foreach my $f (keys %files) { my $lines = $files{$f}; foreach (@$lines) { next if not /^(ATOM|HETATM)/; my $resnum = util::line::peel(substr($_,22,4)); my $segment = substr($_,72,4); push @{$seg{$f}{$segment}}, $resnum; } foreach my $s (keys %{$seg{$f}}) { my $nums = $seg{$f}{$s}; my $min = util::array::min($nums); my $max = util::array::max($nums); my $range = util::line::parserange("$min-$max"); $seg{$f}{$s} = {}; $seg{$f}{$s}{'range'} = "$min-$max"; $seg{$f}{$s}{'gaps'} = 'false'; if (scalar(@$range) != $max-$min+1) #check if there are gaps in the sequence { $seg{$f}{$s}{'gaps'} = 'true'; } } } my @files = keys %files; my $first = $files[0]; foreach my $file (@files) { if (not util::hash::equal($seg{$first},$seg{$file})) { app::error("pdb files $first and $file are not compatible, ". "hey have different segments/residue ranges"); } } my $S = $seg{$first}; if ($list eq 'true') { my $format = "%8s%10s%10s\n"; print "\n"; printf $format, 'segment','residues','comment'; foreach my $s (sort keys %$S) { my $msg; if ($S->{gaps} eq 'true') { $msg = 'gaps in the sequence'; } printf $format,"'$s'",$S->{$s}{range},$msg; } print "\n"; } else { #check that requested segments exist in the list my @pdb_segs = map {if ($_ !~ /^\s+$/){util::line::peel($_)} else {$_}} keys %$S; my @argv_segs = map {if ($_ !~ /^\s+$/){util::line::peel($_)} else {$_}} keys %from; foreach my $argv_seg (@argv_segs) { if (not util::array::isin($argv_seg,\@pdb_segs)) { app::error("Segment '$argv_seg' was not found in pdb files. ". "FYI: available segments are: '".join("', '",@pdb_segs)."'"); } } mkdir $dir if not -e $dir; app::error("$dir already exists and is not a directory") if not -d $dir; chdir $dir; foreach my $file (@files) { my $lines = $files{$file}; for (my $i=0;$i< scalar(@$lines);$i++) { next if $lines->[$i] !~ /^(ATOM|HETATM)/; my $oldres = substr($lines->[$i],22,4); my $oldseg = substr($lines->[$i],72,4); for my $argv_seg (@argv_segs) { if ($oldseg eq $argv_seg) { substr($lines->[$i],22,4) = sprintf "%4d", $oldres + $from{$argv_seg}; substr($lines->[$i],72,4) = sprintf "%-4s", $to; last; } } } util::writefile($lines,$file,'force'); } } } sub calcpck { my $self = shift; my $arg = $self->{argv}; my $hetnuc = $arg->{-hetnuc}; my $fold = $arg->{-fold}; my $ref = $arg->{'-ref'}; my $sw = $arg->{-sw}; my $covalent = $arg->{-covalent}; my $ass1 = $arg->{-ass1}; my $res1 = $arg->{-res1}; my $assnom1 = $arg->{-assnom1}; my $assfmt1 = $arg->{-assfmt1}; my $ass2 = $arg->{-ass2}; my $res2 = $arg->{-res2}; my $assnom2 = $arg->{-assnom2}; my $assfmt2 = $arg->{-assfmt2}; my $pdb = $arg->{-pdb}; my $pdbnom = $arg->{-pdbnom}; my $novalidation = $arg->{-novalidation}; my $cutoff = $arg->{-cutoff}; my $axisorder = $arg->{-axisorder}; if ($covalent eq 'true' and defined $ass2) { app::error("-covalent and -ass2 cannot be used together. ". "If -ass2 is used, then intermolecular peaks will be calculated, which ". "is incompatible with -covalent option"); } if (defined $res2 and not defined $ass2) { app::error("parameter -res2 must be used together with -ass2"); } if (defined $ref or defined $sw) { if (not(defined $ref and defined $sw)) { app::error("parameters -ref and -sw must be used together"); } } if ($novalidation eq 'true') { $pdbnom = 'none'; $assnom1 = 'none'; $assnom2 = 'none'; } if (defined $res1) { $res1 = util::line::parserange($res1); if (not defined $res1) { app::error("-res1 value must be a range expression (e.g '1,3,5,15-45,99')"); } } if (defined $res2) { $res2 = util::line::parserange($res2); if (not defined $res2) { app::error("-res2 value must be a range expression (e.g '1,3,5,15-45,99')"); } } if (join('',sort {$a<=>$b} split /|/, $axisorder) ne '123') { app::error("-axisorder value must be a permutation of 123, ". "$axisorder found\n"); } my ($ASS1, $ASS2); $ASS1 = nmr::readass($ass1,$assfmt1,$assnom1); if ($fold ne 'none') { internal::resonance::fold($ASS1,$ref,$sw,$hetnuc); } if (defined $ass2) { $ASS2 = nmr::readass($ass2,$assfmt2,$assnom2); if ($fold ne 'none') { internal::resonance::fold($ASS2,$ref,$sw,$hetnuc); } } my $str = bio::structure::readpdb($pdb,$pdbnom); #here calculations of intramolecular and intermolecular peaks diverge if (not defined $ass2) { $ASS2 = $ASS1; $res2 = $res1; } bio::structure::addnmrass($str,$ASS1); bio::structure::addnmrass($str,$ASS2); bio::structure::calcbonds($str);#it is an overkill to calculate all bonds my $atoms1 = bio::structure::getprotons($str,$res1); my $atoms2 = bio::structure::getprotons($str,$res2); my @contacts; foreach my $a1 (@$atoms1) { foreach my $a2 (@$atoms2) { next if $a2 eq $a1; #use some sort of getmindist function here my $dist = bio::structure::getmindist($str,$a1,$a2,'SUM'); if ($dist <= $cutoff) { push @contacts, [$a1,$a2]; } } } my @peaks; #calculate axis order index so that for example $ind{3} would return #array index for heteroatom etc. #assuming that $ind{1} is atom connected to geteroatom, $ind{2} #is second proton my @ord = split /|/,$axisorder; my %ind; for (my $i=0;$i<3;$i++) { $ind{$i} = $ord[$i]; } %ind = reverse %ind; my @err; my $id = 1; CONTACT: foreach my $c (@contacts) { my ($a1,$a2) = @$c; my $bonds = $a1->{bonds}; my @hetnuc = grep {$_->{atom} =~ /^$hetnuc/} @$bonds; my $cnt = scalar(@hetnuc); next if $cnt == 0;#maybe it is not right heteroatom if ($cnt > 1) { push @err, intenal::atom::text($a1) . ": $cnt bonds"; } my $h1 = $hetnuc[0]; #normal axis order is $a1,$a2,$hetatom which corresponds to 123 my @atoms = ($a1,$a2,$h1); my @new_peaks;#new peaks derived from current pair of atom sets for (my $i=0;$i<3;$i++)#hardcoded support for three dimensional spectra { my $catom = $atoms[$ind{$i+1}];#for accomodating alternative axis order my $shifts = $catom->{'shifts'}; next CONTACT if not defined $shifts; if (scalar(@new_peaks) > 0) #multiply the number of peaks { my @duplicate_peaks; for (my $i=1; $i{'pos'}}; my @ass = @{$peak->{'ass'}}; $p{'pos'} = \@pos; $p{'ass'} = \@ass; #duplicate each peak push @duplicate_peaks, \%p; } } push @new_peaks, @duplicate_peaks; } else #create empty peaks { foreach my $shift (@$shifts) { my %p; $p{id} = $id;$id++; $p{'pos'} = []; $p{'ass'} = []; push @new_peaks, \%p; } } #now number of new peaks must be multiple of number of shifts to be added my $mul = scalar(@new_peaks)/scalar(@$shifts); app::die("oops, internal error...") if ($mul != int($mul) and $mul > 0); for (my $i=0; $i[$i]; for (my $j = 0; $j<$mul; $j++) { my $peak = $new_peaks[$i+$j]; push @{$peak->{'pos'}}, $shift; my $aname = $catom->{'atom'}; my $resname = $catom->{'residue'}; my %ass = ('atom'=>$aname,'residue'=>$resname); push @{$peak->{'ass'}},\%ass; } } } push @peaks, @new_peaks; $id++; } #HERE pipp::writepeaks('STDOUT',\@peaks); } sub dnapipp { my $self = shift; my $arg = $self->{argv}; my $seq = $arg->{-seq}; my $atomfilter = $arg->{-atoms}; #this data is taken from file #/data8/users/mohamad/programs/pipp/bin/peptide_dna.par my %bases = ( g=>{'name'=>'GUA','atoms'=>[qw ( O5' C5' H5' H5'' H5X C4' H4' O4' H1' C1' C2' H2' H2'' H2X C3' H3' O3' N9 C4 N3 C2 N2 HN' HN'' HNX N1 H1 C6 O6 C5 N7 C8 H8) ]}, a=>{'name'=>'ADE','atoms'=>[qw ( O5' C5' H5' H5'' H5X C4' H4' O4' H1' C1' C2' H2' H2'' H2X C3' H3' O3' N9 C4 N3 C2 H2 N1 C6 N6 HN' HN'' HNX C5 N7 C8 H8) ]}, t=>{'name'=>'THY','atoms'=>[qw ( O5' C5' H5' H5'' H5X C4' H4' O4' H1' C1' C2' H2' H2'' H2X C3' H3' O3' N1 C6 H6 C2 O2 N3 H3 C4 C5 CM H51 H52 H53 H5#) ]}, c=>{'name'=>'CYT','atoms'=>[qw ( O5' C5' H5' H5'' H5X C4' H4' O4' H1' C1' C2' H2' H2'' H2X C3' H3' O3' N1 C6 H6 C2 O2 N3 C4 N4 HN' HN'' HNX C5 H5) ]} ); #this line is to fix syntax highlighting in vim my @junk= qw(g' d); # read in sequence file my @seq = bio::protein::sequence::read($seq); print <{argv}; my $seq = $arg->{-seq}; my $prot = $arg->{-prot}; my @seq = util::readfile($seq); my @prot = util::readfile($prot); my @SEQ;#processed sequence my $res_no = 0; my $firstline = $seq[0]; chomp $firstline; my $seq_fmt = undef; if ($firstline =~ /^[A-Z]{3}\s+\d+\s*$/) { $seq_fmt = 'num'; } elsif ($firstline =~ /^[A-Z]{3}\s*$/) { $seq_fmt = 'auto'; } else {app::error("could not understand first line in file $seq, it has to be in xeasy format")} foreach my $s (@seq) { $s = util::line::peel($s); my $res_name; if ($seq_fmt eq 'num') { $s =~ /^([A-Z]{3})\s+(\d+)$/; $res_name = $1; $res_no = $2; } elsif ($seq_fmt eq 'auto') { $s =~ /^([A-Z]{3})/; $res_name = $1; $res_no++; } else { app::die("oops ... problem in the code"); } $SEQ[$res_no] = $res_name; } $SEQ[0] = scalar(@SEQ); @SEQ = bio::protein::sequence::format(-seq=>\@SEQ,-style=>'one_letter'); my @ass; foreach my $p (@prot) { my $p = util::line::peel($p); my @bits = split /\s+/, $p; my $ppm = $bits[1]; my $atom_name = $bits[3]; my $res_num = $bits[4]; my %resonance; $resonance{atom} = $atom_name; $resonance{'shift'} = $ppm; $resonance{residue} = $SEQ[$res_num].$res_num; push @ass, \%resonance; } pipp::saveass(\@ass,'STDOUT','ignore'); } sub xeasy2pk { my $self = shift; my $arg = $self->{argv}; my $ass = $arg->{-prot}; my $peak = $arg->{-peak}; my $seq = $arg->{-seq}; my $out = $arg->{-out}; app::error('function not yet implemented') } sub shiftmap { my $self = shift; my $ppm1 = $self->{argv}{-ppm1}; my $ppm2 = $self->{argv}{-ppm2}; my $fmt1 = $self->{argv}{-fmt}; my $fmt2 = $self->{argv}{-fmt2}; my $nom1 = $self->{argv}{-nom}; my $nom2 = $self->{argv}{-nom2}; my $pdbnom = $self->{argv}{-pdbnom}; my $atomregex = $self->{argv}{-atoms}; my $pdb = $self->{argv}{-pdb}; my $out = $self->{argv}{-out}; my $str = bio::structure::readpdb($pdb,$pdbnom); if (scalar @$str > 1) { app::error("pdb file contains >1 model; please provide one-model pdb file"); } bio::structure::pdb::zerobfactors($str); if (not defined $fmt2) { $fmt2 = $fmt1; } if (not defined $nom2) { $nom2 = $nom1; } #read two ass tables my $ass1 = nmr::readass($ppm1,$fmt1,$nom1); my $ass2 = nmr::readass($ppm2,$fmt2,$nom2); my $list1 = internal::resonance::organize($ass1); my $list2 = internal::resonance::organize($ass2); my @res = keys %$list1; foreach my $res (@res) { if (defined $list2->{$res}) { foreach my $group (keys %{$list1->{$res}}) { my $atoms1 = $list1->{$res}{$group}; my $atoms2 = $list2->{$res}{$group}; if (util::array::isa($atoms1) && util::array::isa($atoms2)) { if (scalar(@$atoms1) > 0 && scalar(@$atoms2) > 0) { my @a1 = grep {$_->{atom} =~ /^$atomregex/} @$atoms1; my @a2 = grep {$_->{atom} =~ /^$atomregex/} @$atoms2; my $shift1 = util::array::sum([map { $_->{'shift'} } @a1]); my $shift2 = util::array::sum([map { $_->{'shift'} } @a2]); $shift1 /= scalar @$atoms1; $shift2 /= scalar @$atoms2; my $delta = $shift2 - $shift1; my @allatoms; foreach my $a (@a1,@a2) { my $glob = bio::aminoacid::globatoms($a,'iupac'); push @allatoms, @$glob; } my @names = map {$_->{atom}} @allatoms; my $all = util::array::uniq(\@names); foreach my $atom (@$all) # set bfactor for each atom of current group # in the current aminoacid to $delta { bio::structure::pdb::setbfactor($str, $res, $atom, $delta); } } } } } } bio::structure::writepdb($str,$out,$pdbnom); my $model = $str->[0]; my @res = sort { my ($junk,$no1) = bio::protein::sequence::idres($a); my ($junk,$no2) = bio::protein::sequence::idres($b); $no1<=>$no2} keys %$model; my %bfac; foreach my $r (@res) { my @atoms = keys %{$model->{$r}}; my @bfac; foreach my $a (@atoms) { next if ($a !~ /^$atomregex/); my $bfac = $model->{$r}{$a}{bfac}; if (defined $bfac) { push @bfac, $bfac; } } my $sum = 0; foreach my $b (@bfac) { $sum += $b*$b; } if (scalar @bfac > 0) { $bfac{$r} = sqrt($sum)/scalar(@bfac); } } foreach my $r (@res) { print "$r\t$bfac{$r}\n"; } } sub prtdihvio { my $self = shift; my $in = $self->{argv}{-in}; my @lines = util::readfile($in); my %viol; while (util::array::ff(\@lines,'^FILE')) { my $line = shift @lines; $line =~ /^FILE= (.*)$/; my $file = $1; my @viol; while (scalar(@lines) > 0 and $lines[0] !~ /^FILE/) { push @viol, shift(@lines); } $viol{$file} = \@viol; } my %rest; foreach my $file (keys %viol) { my @viol = @{$viol{$file}}; while (util::array::ff(\@viol,' ===========')) { shift @viol; my @rest = splice(@viol,0,6); my $line6 = util::line::peel(pop @rest); my $line5 = util::line::peel(pop @rest); @rest = map {util::line::peel($_)} @rest; my @dihedral = map { my @tmp = split /\s+/, $_; {residue=>"$tmp[1]$tmp[0]",atom=>$tmp[2],resno=>$tmp[0]} } @rest; $_->{'residue'} = bio::protein::aminoacid::format(-res=>$_->{'residue'}, -style=>'one_letter') foreach @dihedral; my @bits = split(/\s+/,$line5); my ($angle,$rest,$viol) = ($bits[1],$bits[7],$bits[9]); @bits = split(/\s+/,$line6); my $range = $bits[1]; #Example: #Dihedral= -96.649 Energy= 0.013 C= 1.000 Equil= -60.000 Delta= 6.649 #Range= 30.000 Exponent= 2 my $key = internal::dihedral::text(\@dihedral); push @{$rest{$key}}, {file=>$file,dihed=>$angle,rest=>$rest, viol=>$viol, atoms=>\@dihedral }; } } if (scalar(keys %rest)>0) { print util::line::toupper("\n#viol restraint ave dist(viol) ". " stdev assignment\n\n"); foreach my $key (sort {scalar(@{$rest{$b}})<=>scalar(@{$rest{$a}})} keys %rest) { my @instances = @{$rest{$key}}; my @files = map {$_->{file}} @instances; my @viol = map {$_->{viol}} @instances; my $viol_mean = -1 * util::array::mean(\@viol); my $viol_std = util::array::std(\@viol); my $dist = $instances[0]->{dist}; my $rest = $instances[0]->{rest}; my $num = scalar(@instances); my $sign = util::scalar::sign($viol_mean); printf " %-6d%-20s%6.2f(%s%.2f)%11.2f %-35s\n\n", $num,$rest,$dist,$sign,$viol_mean,$viol_std,$key; } } } sub showrest { my $self = shift; my $arg = $self->{argv}; my $in = $arg->{-in}; my $res = $arg->{-res}; my @lines = util::readfile($in); @lines = grep {!/^\!/ and !/^\s*$/} @lines; util::assert(sub{scalar(@$res)==2},'exactly two aminoacid ranges are required'); my ($res1,$res2) = @$res; util::assert(sub{$res1 =~ /^\d+(-\d+)?$/}, 'first range not understood (use e.g.: "3" or "5-41")'); my @rest; while (@lines) { my $line = shift(@lines); if ($line =~ /ASSI/i) { # @lines has restraint lines in it # $res1 and $res2 are intervals #1 and #2 # each interval might be either one residue [.] # or interval my $rest = testrest(\@lines,$res1,$res2); if (defined $rest) { print $line; print join('',@$rest); print "\n"; } } } sub grabsel { my $input = shift; my $out; #here i assume that first line starts with opening parenthesis app::die("premature end of file") if scalar(@$input) == 0; if ($input->[0] !~ /^\s*\(/) { app::die("first line in selection must start with '(': '$input->[0]'"); } my $paren = 0; my $started = 0; SEARCH: while (@$input) { my $line = shift @$input; chomp $line; $out .= ' ';# add one empty space just in case my @C = split /|/, $line; while (@C) { my $char = shift @C; if ($char eq '(') { $paren++; $started = 1; } elsif ($char eq ')') { $paren--; } if ($paren<0) { app::die("too many right parentheses in $line"); } elsif ($paren == 0) { if ($started) { my $last = join('',@C); if ($last =~ /\S/) { unshift @$input, $last."\n"; } $out .= $char; last SEARCH; } } $out .= $char; } } $out =~ s/\s+/ /g; return $out; } sub testrest { my ($input,$res1,$res2) = @_; # prepare output my @OUT; for (my $i=0; $i[$i] !~ /ASSI/i; $i++) { push @OUT, $input->[$i]; } my $sel1 = grabsel($input); my $sel2 = grabsel($input); # check if restraint matches search criteria my @res = ($res1,$res2); my $atom1 = getatom($sel1); my $atom2 = getatom($sel2); my $num1 = getnum($sel1); my $num2 = getnum($sel2); my @num = ($num1,$num2); my @at = ($atom1,$atom2); my $first = $OUT[0]; @OUT = ($OUT[-1]); unshift @OUT, $sel2."\n"; unshift @OUT, $sel1."\n"; unshift @OUT, $first; my $same=0; $same = 1 if interval_inrange(\@res,\@num,\@at); shift @$input; while (@$input and $input->[0] =~ /OR/ and $input->[0] !~ /ASSI/i) { shift @$input; my $atom1 = getatom($input->[0]); my $atom2 = getatom($input->[1]); my $num1 = getnum(shift(@$input)); my $num2 = getnum(shift(@$input)); my @num = ($num1,$num2); my @at = ($atom1,$atom2); $same = 1 if interval_inrange(\@res,\@num,\@at); } # return restraint lines if it matched search criteria return \@OUT if $same; return undef; } sub readrange # read range as provided in command line arguments and form two # element array from it { my $in = shift; my @range; if ($in =~ /^\d+(\.[A-Z]{1,2}\d{0,2}[#%]?)?$/) # one residue matching make fake two element array { @range = ($in,$in); } elsif ($in =~ /^\d+\-\d+$/) # two residue matching # produce sorted in ascending order array with residue numbers { @range = sort {$a<=>$b} split( /-/, $in); } else { app::die("incorrect range $in"); } return @range; } sub wk2regex { my $wk = shift; if ($wk =~ /^(.*)\%$/) { return "^$1.\$"; } if ($wk =~ /^(.*)\#$/) { return "^$1.*\$"; } return $wk; } sub inrange { my ($what,$atom,$where) = @_; my ($num1,$atom1) = parse_def($where->[0]); my ($num2,$atom2) = parse_def($where->[1]); if ($what >= $num1 and $what <= $num2) { if (defined $atom1) { my $regex = wk2regex($atom1); if ($atom =~ /$regex/) { return 1; } return 0; } elsif (defined $atom2) { my $regex = wk2regex($atom2); if ($atom =~ /$regex/) { return 1; } return 0; } else { return 1; } } return 0; } sub parse_def { my $what = shift; $what =~ /^(\d+)(\.[A-Z]{1,2}\d{0,2}[#%]?)?$/; my $num = $1; my $atom = $2; $atom =~ s/^\.//; return ($num,$atom); } sub interval_inrange { # ar2 array with atom numbers read in from the aria restraint # lines # $at array with corresponding atom names my ($ar1,$ar2,$at) = @_; return 0 if scalar(@$ar1) != 2; return 0 if scalar(@$ar2) != 2; my @range1 = readrange($ar1->[0]); my @range2 = readrange($ar1->[1]); # try matching atoms to ranges in two ways return 1 if (inrange($ar2->[0],$at->[0],\@range1) and inrange($ar2->[1],$at->[1],\@range2)); return 1 if (inrange($ar2->[0],$at->[0],\@range2) and inrange($ar2->[1],$at->[1],\@range1)); # returning zero if matching in both ways failed return 0; } sub getatom { my $in = shift; $in =~ /name\s+([^\s]+)\s+/; my $atom = $1; app::die("cannot parse $in") if not defined $atom; return $atom; } sub getnum { my $in = shift; $in =~ /resid\s+(\d+)\s+/; my $num = $1; app::die("cannot parse $in") if not defined $num; return $num; } } sub prtvio { my $self = shift; my $in = $self->{argv}{-in}; my @lines = util::readfile($in); my %viol; while (util::array::ff(\@lines,'^FILE')) { my $line = shift @lines; $line =~ /^FILE= (.*)$/; my $file = $1; my @viol; while (scalar(@lines) > 0 and $lines[0] !~ /^FILE/) { push @viol, shift(@lines); } $viol{$file} = \@viol; } my %rest; foreach my $file (keys %viol) { my @viol = @{$viol{$file}}; while (util::array::ff(\@viol,'set-i-atoms')) { shift @viol; my $atom_lines1 = util::array::grabff(\@viol,'set-j-atoms'); my @group1 = map { # dnaA 10 ADE H1' my $segid = util::line::peel(substr($_,10,4)); my $res_num = util::line::peel(substr($_,15,4)); my $res_name = util::line::peel(substr($_,20,4)); my $atom = util::line::peel(substr($_,25,4)); my $residue = $res_name . $res_num; {segid=>$segid,residue=>$residue,atom=>$atom,resno=>$res_num} } @$atom_lines1; shift @viol; my $atom_lines2 = util::array::grabff(\@viol,'R<.*>='); my @group2 = map { my $segid = util::line::peel(substr($_,10,4)); my $res_num = util::line::peel(substr($_,15,4)); my $res_name = util::line::peel(substr($_,20,4)); my $atom = util::line::peel(substr($_,25,4)); my $residue = $res_name . $res_num; {segid=>$segid,residue=>$residue,atom=>$atom,resno=>$res_num} } @$atom_lines2; my $info = shift @viol; $info = util::line::peel($info); my @bits = split(/\s+/,$info); my ($dist,$rest,$viol) = ($bits[1],"$bits[3]$bits[4]$bits[5]$bits[6]",$bits[8]); #3.30(-1.50/+0.00) $rest =~ /^($NUMPATTERN)$-($NUMPATTERN)\/($NUMPATTERN)$/; my $rest_min = sprintf "%4.2f", $1 - $2; my $rest_max = sprintf "%4.2f", $1 + $3; $rest = "$rest_min...$rest_max"; my $key = join('<->', sort (util::line::peel(internal::assignment::text(\@group1)), util::line::peel(internal::assignment::text(\@group2)))); push @{$rest{$key}}, {file=>$file,dist=>$dist,rest=>$rest, viol=>$viol, group1=>\@group1, group2=>\@group2, }; } } if (scalar(keys %rest)>0) { print util::line::toupper("\n#viol xplor restraint pdb dist+/-stddev". " violation assignment\n\n"); foreach my $key (sort {scalar(@{$rest{$b}})<=>scalar(@{$rest{$a}})} keys %rest) { my @instances = @{$rest{$key}}; my @files = map {$_->{file}} @instances; my @viol = map {$_->{viol}} @instances; my @dist = map {$_->{dist}} @instances; my $viol_mean = -1 * util::array::mean(\@viol); my $viol_std = util::array::std(\@viol); my $dist_std = util::array::std(\@dist); my $dist = util::array::mean(\@dist); my $rest = $instances[0]->{rest}; my $num = scalar(@instances); my $sign = util::scalar::sign($viol_mean); printf " %-6d %-17s%6.2f+/-%4.2f%8s%.2f %s\n\n", $num,$rest,$dist,$dist_std,$sign,$viol_mean,util::line::peel($key); #withouk std #printf " %-6d%-20s%6.2f%8s%.2f%-35s\n\n", # $num,$rest,$dist,$sign,$viol_mean,$key; } } } sub calibrate_noe { my $self = shift; my $arg = $self->{argv}; my $pdb = $arg->{-pdb}; my $nom = $arg->{-nom}; my $pck = $arg->{-pck}; my $range = $arg->{-range}; my $atomregex = $arg->{-atoms}; my $strong = $arg->{-strong}; my $graph = $arg->{-graph}; my $xplor = $arg->{-xplor}; my $keepcov = $arg->{-keepcov}; my $w = $arg->{'-w'}; my $m = $arg->{'-m'}; my $s = $arg->{'-s'}; my $A = $arg->{'-A'}; my $B = $arg->{'-B'}; my $exp = $arg->{'-exp'}; my $bin = $arg->{'-bin'}; my $cont = $arg->{'-cont'}; my $uplim = $arg->{'-uplim'}; my $nolib = $arg->{'-nolib'}; util::assert(sub {ref $pck ne 'ARRAY'},'only one .PCK file allowed'); my @lines = util::readfile($pck); @lines = grep { !/^#/ } @lines; while ($lines[0] !~ /^VARS/ and scalar(@lines) > 1) { shift @lines; } shift @lines; my @pairs; # determine type of action my $action; if (not ($strong eq 'true' xor $graph eq 'true' xor $xplor eq 'true')) { app::error('either one (and only one) -xplor, -strong, or -graph option must be used '. 'to define type of action'); } else { $action = 'strong' if $strong eq 'true'; $action = 'graph' if $graph eq 'true'; $action = 'xplor' if $xplor eq 'true'; } # determine type of calibration my $cal; if (not ($bin eq 'true' xor $cont eq 'true' xor defined $uplim)) { if ($action eq 'graph' or $action eq 'strong') { $cal = 'cont'; } else { app::error("either one (and only one) calibration type -bin, ". "-cont, or -uplim must be specified"); } } else { $cal = 'bin' if $bin eq 'true'; $cal = 'cont' if $cont eq 'true'; $cal = 'uplim' if defined $uplim; } my $bin_auto = undef; if (defined $w or defined $m or defined $s) { if ($cal ne 'bin') { app::error("options -w, -m and -s are to be used with -bin ". "(i.e. binned calibration) only\n"); } else #binned calibration requested { if (not (defined $w and defined $m and defined $s)) { app::error("options -w, -m and -s must be used together"); } else { $bin_auto = 0; } } } elsif ($cal eq 'bin') { $bin_auto = 1; } #determine compatibility of type of action and type of calibration if ($action eq 'strong' or $action eq 'graph') { if (not defined $pdb) { app::error("-pdb must be used together with option \-$action"); } if ($cal ne 'cont') { app::error("option \-$cal is not compatible with \-$action"); } } elsif ($action eq 'xplor') { if (defined $pdb) { if ($cal ne 'cont') { app::error("options \-$cal and -pdb cannot be used together"); } } else { if ($cal eq 'cont') { app::error("option -pdb is required if -cont is used"); } else { if (defined $A) { app::error("option -A is not compatible with \-$cal"); } if (defined $B) { app::error("option -B is not compatible with \-$cal"); } } } } if (defined $pdb) { if (not defined $nom) { app::error("atom naming type in pdb files (xplor, pdb, or iupac) ". "must be specified with -nom"); } } #------ read pck file # use Cwd 'abs_path'; my $pck_path = abs_path($pck); my $peak_input_order = 0; foreach my $line (@lines) { next if $line =~ /^\s+$/; $line = util::line::peel($line); my @bits = split(/\s+/,$line); my $D = undef; my $rg = "^$NUMPATTERN\$"; if ($bits[4] =~ $rg and $bits[5] !~ $rg) { $D = 2; } elsif ($bits[5] =~ $rg and $bits[6] !~ $rg) { $D = 3; } elsif ($bits[6] =~ $rg and $bits[7] !~ $rg) { $D = 4; } my ($id,$intensity,$atom1,$atom2) = ($bits[0],abs($bits[$D+2]),$bits[$D+3], $bits[$D+4]); next if $atom1 =~ /\*\*/ or $atom2 =~ /\*\*/; next if $intensity == 0; $atom1 =~ s/;//g; $atom2 =~ s/;//g; $peak_input_order++; my @comments; push @pairs, {id=>$id,atom1=>$atom1,atom2=>$atom2,intensity=>$intensity, line=>$line,comments=>\@comments,peak_input_order=>$peak_input_order,file=>$pck_path}; } # #------ read pck file # set aminoacid range my ($start,$end) = (1,100000); if (defined $range) { util::assert(sub {$range =~ /^\d+-\d+$/}, '-range value must be '. 'given as two numbers separated by dash, e.g 39-45'); ($start,$end) = sort {$a<=>$b} split(/-/,$range); } my $struc; # determine distances if (defined $pdb) { my @fmt = qw(pdb iupac xplor); util::assert(sub{util::array::isin($nom, \@fmt)}, '-nom must be one of '. join(',',@fmt)); app::message("reading pdb files...") if $xplor eq 'false'; $struc = bio::structure::readpdb($pdb,$nom); foreach my $pair (@pairs) { my $DIST = bio::structure::getMinDistByLabel($struc, $pair->{atom1},$pair->{atom2},'SUM'); $pair->{distance} = $DIST; } } my $eq; #calibration equation used later if ($cal eq 'cont') # determine noe calibration based on given pdb files { my $np = scalar(@pairs); if ($np < 3) { app::error("only $np assigned noe's found in file $pck and noe calibration ". "is impossible"); } app::errmessage("$np useable assigned peaks found in file $pck"); my @sample = grep {inscope($_,[$start,$end],$atomregex)} @pairs; my $ns = scalar @sample; if ($ns < 3) { app::error("only $ns noe's were selected for calibration". " assign more signals or/and relax -range/-atoms parameters"); } app::errmessage("$ns signals were selected for calibration"); if (defined $pdb) { @pairs = grep {$_->{distance}>0} @pairs; } @pairs = grep {defined $_->{intensity}} @pairs; if ($exp < 0) { app::error("-exp value must be entered as positive number"); } if (not defined $A and not defined $B) { my @int = map {$_->{intensity}} @pairs; my $min = util::array::min(\@int); my @short = grep {$_->{distance} <= 2} @sample; my @int = map {$_->{intensity}} @short; my $max = util::array::max(\@int); my $min_dist = 1.8; my $max_dist = 7.0; $A = ($max-$min)/($min_dist**(-$exp) - $max_dist**(-$exp)); $B = $max - $A * $min_dist**(-$exp); } elsif (defined $A xor defined $B) { app::error("-A and -B must be used simultaneously"); } my $s = util::scalar::sign($B); $eq = sprintf "%6.2e*x**(-%3.2f) %s %6.2e", $A, $exp, $s, abs($B); app::errmessage("Equation int(r) = $eq will be used for intensity calculation"); } if ($xplor eq 'true') #produce xplor distance restraint file (option -xplor) { my %noe; # determine noe signal levels for binned intensity calibration my ($VWEAK,$WEAK,$MED);#VWEAK is very weak, strong are implicit - stronger then MED if ($cal eq 'bin') { if ($bin_auto) { my @int = sort {$a<=>$b} map {$_->{intensity}} @pairs; my $num = scalar(@int); $VWEAK = $int[int(0.2*$num)]; $WEAK = $int[int(0.5*$num)]; $MED = $int[int(0.8*$num)]; } else #notise there is a mismatch of terminology for variables #$VWEAK, $WEAK and $MED have slightly different meaning #vs $w,$m,$s { $VWEAK = $w; $WEAK = $m; $MED = $s; } } OUTPUT: foreach my $pair (@pairs) { my $atom1 = $pair->{atom1}; my $atom2 = $pair->{atom2}; my $gr1 = xplor::globatoms($atom1,$nolib); my $gr2 = xplor::globatoms($atom2,$nolib); next if not defined $gr1 or not defined $gr2; $pair->{atom_group1} = $gr1; $pair->{atom_group2} = $gr2; #ignore noe's from protons separated by less then 4 bonds if ($nolib eq 'false' and $keepcov eq 'false') { foreach my $a1 (@$gr1) { foreach my $a2 (@$gr2) { my @bits1 = split /|/, $a1->{atom}; my @bits2 = split /|/, $a2->{atom}; #here ord() is a perl function that returns #numerical (ordinal) value of a character my $atom_ord1 = util::line::toupper($bits1[1]); my $atom_ord2 = util::line::toupper($bits2[1]); if (abs(util::scalar::greek_ord($atom_ord1)- util::scalar::greek_ord($atom_ord2)) < 2) { next OUTPUT if ($a1->{residue} eq $a2->{residue}); } } } } #use empirical curve and add 0.5 A to methyls #add 0.2A to amides #add 0.4A to amide-amide my $offset = 0; if (bio::aminoacid::atom::ismethyl($gr1)) { $offset += 0.5; } elsif (bio::aminoacid::atom::isamid($gr1) and scalar(@$gr1)==1) { $offset += 0.2; } if (bio::aminoacid::atom::ismethyl($gr2)) { $offset += 0.5; } elsif (bio::aminoacid::atom::isamid($gr2) and scalar(@$gr2)==1) { $offset += 0.2; } my $I = $pair->{intensity}; $pair->{offset} = $offset; my $upper_limit; if ($cal eq 'bin') { $upper_limit = classify($I,$VWEAK,$WEAK,$MED) + $offset; if ($upper_limit > 6.0) { $upper_limit = 6.0; } } elsif ($cal eq 'cont') { $upper_limit = (($I-$B)/$A)**(-1/$exp) + $offset; if ($upper_limit eq 'nan' or $upper_limit > 6) { $upper_limit = 6; } } elsif ($cal eq 'uplim') { $upper_limit = $uplim; } my $contact = makelabel($gr1,$gr2); #absolute upper limit is 6.0 angstroms $pair->{upl} = $upper_limit; push @{$noe{$contact}},$pair; } #find best signal for each contact #by looking for the most conservative estimate #upper limit distance #print contact in xplor format and also print commented out label #to weed out contacts from different spectra foreach my $contact (keys %noe) { my $pairs = $noe{$contact}; my $best = $pairs->[0]; my $smallest_intensity = abs($pairs->[0]{intensity}); foreach my $pair (@$pairs) { $best = $pair if ($pair->{upl} > $best->{upl}); if (abs($pair->{intensity} < $smallest_intensity)) { $smallest_intensity = abs($pair->{intensity}); } } my $dist = sprintf "%4.2f", $best->{upl}; my $comments = $best->{comments}; push @$comments, "peak id $best->{id}, upper limit distance $dist, intensity $smallest_intensity"; xplor::print($best); } } elsif ($strong eq 'true') { my @strong = grep { $_->{intensity} > $A*($_->{distance})**(-$exp) + $B } @pairs; map { $_->{violation} = $_->{distance} - (($_->{intensity} - $B)/$A)**(-1/$exp) } @strong; print "peak viol dist int atom1 atom2\n"; foreach my $pair (sort {$b->{violation} <=> $a->{violation}} @strong) { my $atom1 = $pair->{atom1}; my $atom2 = $pair->{atom2}; my $dist = $pair->{distance}; my $int = $pair->{intensity}; my $viol = $pair->{violation}; my $id = $pair->{id}; printf "%-6s%6.2f%6.2f%11.2e%15s%15s\n",$id,$viol,$dist,$int,$atom1,$atom2; } return; } elsif ($graph eq 'true') { use File::Temp; my ($plot,$filename) = File::Temp::tempfile(); my @int = map {$_->{intensity}} @pairs; my $max = util::array::max(\@int); foreach my $pair (@pairs) { print $plot $pair->{distance}, "\t", $pair->{intensity}, "\n"; } close $plot; open GP, "| gnuplot" or app::error("cannot start gnuplot: $!"); my $fh = select(GP); $| = 1; select($fh); print "\n"; app::message("running gnuplot:"); my $cmd = "set xrange [0:7];set yrange [0:$max]\n"; print GP $cmd; chomp $cmd; app::message($cmd); $cmd = "plot '$filename', $eq\n"; print GP $cmd; print(" ",$cmd); print "\n"; app::message("NOTE: use -strong option to print out the list of restraints ". "above the solid line"); sleep 1000000; } sub classify { my ($int,$vw,$w,$med) = @_; return 6.0 if $int < $vw; return 5.0 if util::scalar::inrange($int,[$vw,$w]); return 3.3 if util::scalar::inrange($int,[$w,$med]); return 2.7; } sub makelabel { my ($group1,$group2) = @_; my $from = join(',', sort(map {$_->{atom} . '@' . $_->{residue}} @$group1)); my $to = join(',', sort(map {$_->{atom} . '@' . $_->{residue}} @$group2)); return join('-',sort($from,$to)); } sub inscope { my ($pair,$range,$atomregex) = @_; if ($pair->{atom1} =~ /^[A-Z]+(\d+)\./) { return 0 if not util::scalar::inrange($1,[$start,$end]); } if ($pair->{atom2} =~ /^[A-Z]+(\d+)\./) { return 0 if not util::scalar::inrange($1,[$start,$end]); } if (defined $atomregex) { my $group1 = xplor::globatoms($pair->{atom1}); my $group2 = xplor::globatoms($pair->{atom2}); return 0 if not defined $group1 or not defined $group2; my @atoms1 = map {$_->{atom}} @$group1; my @atoms2 = map {$_->{atom}} @$group2; return 0 if not ( util::array::somematch(\@atoms1,$atomregex) and util::array::somematch(\@atoms2,$atomregex) ); } return 1; } } sub delExp { my $self = shift; my $argv = $self->{argv}; my $file = $argv->{-dat}; my $spectrum = $argv->{-spectrum}; my $sp = internal::spectrum::read($file); delete $sp->{spectra}{$spectrum}; delete $sp->{parameters}{$spectrum}; internal::spectrum::write($sp,$file,'force'); } sub importseq { my $self = shift; my $sp = internal::spectrum::init($self->{argv}{-dat}); my @seq = bio::protein::sequence::read($self->{argv}{-seq}); internal::spectrum::setdata($sp,'SEQUENCE',\@seq); internal::spectrum::write($sp,$self->{argv}{-dat},'force'); } sub trest { my $self = shift; my $pdb = $self->{argv}{-pdb}; my $nom = $self->{argv}{-nom}; my $in = $self->{argv}{-in}; my $cutoff = $self->{argv}{-cutoff}; my $specno = util::array::insure($self->{argv}{-specno}); my %specno; foreach my $spec (@$specno) { util::assert(sub{$spec =~ /^[^=]+=\d+$/}, '= format '. 'expected for -specno values,'. " $spec found"); my @pair = split /=/, $spec; $specno{$pair[1]} = $pair[0]; } my $tmp = scalar(@$specno); my %tmp = reverse %specno; util::assert(sub{$tmp == scalar(keys %specno) and $tmp == scalar(keys %tmp)}, 'names and aria assigned numbers to spectra given with -specno parameter '. 'must be unuique'); if (defined $pdb) { util::assert(sub{defined $nom},'-nom must be used together with -pdb'); my @fmt = qw(pdb xplor iupac); util::assert(sub{util::array::isin($nom,\@fmt)}, 'value of -nom must be one of '. util::line::reportopt(\@fmt)); $pdb = bio::structure::readpdb($pdb,$nom); } my $sp = internal::spectrum::read($in); my @names = internal::spectrum::names($sp); util::assert(sub{util::array::somein(\@names,['cnoe','nnoe'])}, "file $in does not have noe spectra (named cnoe or ". "nnoe)"); util::assert(sub{util::array::allin([values %specno],\@names)}, 'some spectra with names given with -specno parameter are not '. "present in the $in datafile (try -inspect)"); my $ass = internal::spectrum::getdata($sp,'ASS'); util::assert(sub{defined $ass},"file $in does not have ". "assignment table"); my $seq = internal::spectrum::getdata($sp,'SEQUENCE'); util::assert(sub{defined $ass},"file $in does not contain ". "protein sequence"); app::message("Enter restraint in aria format and enter Ctrl-D:"); my @input = ; my $rest = nmr::aria::restraint::parsenoe(\@input); util::assert(sub{defined $rest->{peakno}},"Peak number not recognized in ". "restraint record"); # fix residue label to contain letter and serial number my $resid1 = $rest->{resid1}; $rest->{resid1} = $seq->[$resid1] . $resid1; my $resid2 = $rest->{resid2}; $rest->{resid2} = $seq->[$resid2] . $resid2; # pull the peak out of the peak table my $peakno = $rest->{peakno}; my $spectrum = $specno{$rest->{specno}}; my $peak = internal::spectrum::getpeak($sp,$spectrum,$peakno); util::assert(sub{defined $peak}, "peak $peakno not found in spectrum $spectrum"); # calculate all possible assignments # based on the assignment table internal::peak::noeass($peak,$sp); my $alt_ass = $peak->{noeass}; my @ALT; foreach my $noeass (@$alt_ass) { my $symm = internal::spectrum::peak::findSymmByAss($sp, ['cnoe','nnoe'],$peak,$noeass); my %alt; $alt{symm} = $symm; $alt{origin} = $noeass; push @ALT, \%alt; } if (scalar(@ALT) > 0) { app::message('Possible assignments:'); my $i = 1; foreach my $alt (@ALT) { my $noeass = $alt->{origin}; my $symm = $alt->{symm}; my $num = sprintf "%2d)", $i; my $dist; if (defined $pdb) { my $prot1 = internal::noe::anchorreson($sp,$spectrum,$noeass); my $prot2 = internal::noe::leadreson($sp,$spectrum,$noeass); $prot1 = bio::aminoacid::globatoms($prot1,'iupac'); $prot2 = bio::aminoacid::globatoms($prot2,'iupac'); $dist = bio::structure::getmindist($pdb,$prot1,$prot2,'SUM'); if (defined $cutoff) { next if $dist > $cutoff; } if (defined $dist) { my $delta = nmr::noe::restraint::violation($rest,$dist); $dist = sprintf "DISTANCE %4.2fA", $dist; if ($delta == 0) { $dist .= " VIOLATION 0.0A"; } else { $dist .= sprintf(" VIOLATION %5.2fA", $delta); } } else { $dist = "\t".'NO DISTANCE in pdb file'; } } # todo calculate assignment violation app::message($num . internal::assignment::text($noeass) . $dist); $i++; if (defined $symm and scalar(@$symm) > 0) { print ' '; app::message('Symmetry peak(s) found:'); foreach my $s (@$symm) { # todo calculate symmetry violation print ' '; app::message('* '. internal::peak::text($s)); } } else { print ' '; #my $location = internal::peak::new::fromAss($noeass,$sp); my $location = internal::peak::findSymmLocByAss($noeass,$sp); my $at = internal::peak::text::pos($location); app::message('No symmetry peaks around ' . $at); } } } else { app::message('No assignment found for the peak'); } } sub findclose { my $self = shift; my $arg = $self->{argv}; my $ave = util::line::toupper($arg->{-ave}); my $pdb = $arg->{-pdb}; my $at = $arg->{-at}; my $nom = $arg->{-nom}; my $cutoff = $arg->{-cutoff}; my $nstr = $arg->{-nstr}; util::assert(sub{util::array::isin($ave,['SUM','AVE'])}, 'averaging must be either SUM or AVE'); if (ref $at eq 'ARRAY') { app::error("only one atom/group allowed for -at parameter"); } $at = util::line::toupper($at); util::assert( sub{$at=~/^[A-Z]\d+\.[A-Z][ABGDEHNZ]?\d{0,2}[#%]?$/}, "atom label $at not understood ". "see help more information on input format"); my $str = bio::structure::readpdb($pdb,$nom); my $group = xplor::globatoms($at); util::assert(sub{scalar(@$group)>0},"no atoms found for $at"); #todo translate atoms to iupac here foreach my $atom (@$group) { my $res = $atom->{residue}; my ($residue,$num) = bio::protein::sequence::idres($res); my $name = bio::protein::aminoacid::atom::translate( -from=>'xplor', -to=>'iupac', -atom=>$atom->{atom}, -aminoacid=>$residue ); $atom->{atom} = $name; } my $contacts = bio::structure::findcontacts($str,$group,$cutoff,$nstr,$ave); my $GR1 = join(',',map {internal::atom::text($_)} @$group); if (scalar(@$contacts)>0) { print "\n"; foreach my $contact (sort {$a->{dist}<=>$b->{dist}} @$contacts) { my $GR2 = join(',',map {internal::atom::text($_)} @{$contact->{atoms}}); printf "$GR1<----->$GR2 %-6.2fA\n", $contact->{dist}; } print "\n"; } else { app::message("No close contacts found in file $pdb"); } } sub dist { my $self = shift; my $arg = $self->{argv}; my $ave = util::line::toupper($arg->{-ave}); my $pdb = $arg->{-pdb}; my $at = $arg->{-at}; my $nom = $arg->{-nom}; util::assert(sub{util::array::isin($ave,['SUM','AVE'])}, 'averaging must be either SUM or AVE'); util::assert(sub{ref $at eq 'ARRAY' and scalar(@$at) == 2}, 'two atom or atom group designator expected'); my @at = map {util::line::toupper($_)} @$at; foreach my $a (@at) { util::assert( sub{$a=~/^[A-Z]\d+\.[A-Z][ABGDEHNZ]?\d{0,2}[#%]?$/}, "atom label $a not understood ". "see help more information on input format"); } my $str = bio::structure::readpdb($pdb,$nom); my $DIST = bio::structure::getMinDistByLabel($str,$at->[0],$at->[1],$ave); my $group1 = xplor::globatoms($at[0]); my $group2 = xplor::globatoms($at[1]); if (defined $DIST) { print "\n"; my $GR1 = join(',',map {internal::atom::text($_)} @$group1); my $GR2 = join(',',map {internal::atom::text($_)} @$group2); print util::line::wrap(-line=>"Atom group1 $GR1"),"\n"; print util::line::wrap(-line=>"Atom group2 $GR2"),"\n"; print util::line::wrap(-line=>sprintf "Distance %-6.2f Angstroms", $DIST), "\n\n"; } else { app::message("Requested distance not found in file $pdb"); } } sub hbond { my $self = shift; my $in = $self->{argv}{-in}; my @lines = util::readfile($in); foreach my $line (@lines) { my @bits = split /\s+/, $line; my $out = 'assign ( residue '.$bits[0].' and name '.$bits[2].' ) '; $out .= '( residue '.$bits[3].' and name '.$bits[5].' ) 1.8 0.0 0.5'."\n"; $out .= 'assign ( residue '.$bits[0].' and name N ) '; $out .= '( residue '.$bits[3].' and name '.$bits[5].' ) 2.8 0.0 0.5'."\n"; print $out; } } sub ass2tbl { my $self = shift; my $in = $self->{argv}{-in}; my $fmt = $self->{argv}{-fmt}; my $nom = $self->{argv}{-nom}; my $wildcards = [sort @{util::array::insure($self->{argv}{-atoms})}]; my @fmt = qw(sparky pipp); util::assert(sub{util::array::isin($fmt,\@fmt)}, "unexpected value of -fmt, must be either sparky or pipp"); my @nom = qw(iupac xplor); util::assert(sub{util::array::isin($nom,\@nom)}, "unexpected value of -nom, must be either iupac or xplor"); my $ass = nmr::readass($in,$fmt,$nom); util::print(\$ass); my %residues; # sort assignments by residue into a new data structure foreach my $as (@$ass) { my $r = $as->{residue}; push @{$residues{$r}}, $as; } # convert wildcards into regexes my @err; foreach (@$wildcards) { if ($_ !~ /^[HCN][OABGDEHZN]?[123]?$/) { push @err, "atom wildcard $_ not understood"; } if ($_ =~ /^[HCN]$/) { $_ = '^' . $_ . '$'; } else { $_ = '^'. $_ . '[123]{0,2}$'; } } app::error(\@err) if scalar(@err) > 0; # print the header line my $w = 9; printf "%-$w\s", 'res'; foreach my $wildcard (@$wildcards) { $wildcard =~ /\^([^\[]+)\[123\]\{0\,2\}\$/; my $label = $1; printf "%-$w\s", $label; } print "\n"; foreach my $r (sort {util::cmpaa($a,$b)} keys %residues) { printf "%-$w\s", $r; ATOM: foreach my $wildcard (@$wildcards) { # print an entry if $_ matches any atom # print empty space otherwise my $found = 0; foreach my $at (@{$residues{$r}}) { if ($at->{atom} =~ /$wildcard/) { printf "\%-$w\.2f", $at->{'shift'}; next ATOM; } } print util::line::blank($w); } print "\n"; } } sub asssparky2pipp { my $self = shift; my $argv = $self->{argv}; my $in = $argv->{-in}; my $out = $argv->{-out}; my $ifmt = $argv->{-nom}; my $ofmt = $argv->{-onom} || $ifmt; my $chir = $argv->{-chir}; my @nom = qw(iupac xplor); util::assert(sub{util::array::allin([$ifmt,$ofmt],\@nom)}, 'values of both -ifmt and -ofmt must be either '. 'xplor or iupac'); my $ass = nmr::readass($in,'sparky',$ifmt); if ($chir eq 'true') { foreach my $r (@$ass) { my $name = $r->{atom}; my $aa = $r->{residue}; my $num; ($aa,$num) = bio::protein::sequence::idres($aa); $name = bio::protein::aminoacid::atom::scramble($name,$aa); $r->{atom} = $name; } } if ($ofmt ne 'iupac') { foreach my $r (@$ass) { my $name = $r->{atom}; my @ibits = split /\|/,$name; my @obits; foreach my $ibit (@ibits) { my $obit = bio::protein::aminoacid::atom::translate( -from=>'iupac', -to=>$ofmt, -atom=>$ibit, -aminoacid=>$r->{residue} ); push @obits, $obit; } $r->{atom} = join('|',@obits); } } nmr::writeass($ass,$out,'pipp'); } sub pksparky2pipp { my $self = shift; my $in = $self->{argv}{-in}; my $out = $self->{argv}{-out}; my $dims = $self->{argv}{-axes}; my $label = $self->{argv}{-label}; $dims = [] if not defined $dims; $label = [] if not defined $label; $dims = util::array::insure($dims); $label = util::array::insure($label); my $peaks = sparky::readpeaks($in); my $dim = internal::peak::dim($peaks->[0]); util::assert(sub{defined $out},'file name must be given '. 'with parameter -out'); if (scalar(@$dims) == 0) { my @pipp = qw(x y z a); my @sparky = qw(w1 w2 w3 w4); for (my $i=0;$i<$dim;$i++) { $dims->[$i] = $pipp[$i] .'='. $sparky[$i]; } } if (scalar(@$label) == 0) { if ($dim > 2) { app::message("cannot automatically determine label format, ". "use -label variable (also check -help)\n"); } $label->[0] = 'w1'; $label->[1] = 'w2'; } util::assert(sub{$dim == @$dims}, "spectrum $in is $dim\D, so -axes parameter ". "must be given for exactly $dim dimensions" ); util::assert(sub{util::array::allmatch($dims, '^[xyzaXYZA]=[wW][1-4]$')}, '-axes parameter value must be in format:'. 'x= y= etc. For example: '. 'x=w2 y=w1 x=w4 a=w3'); util::assert(sub{util::array::allmatch($label, '^[wW][1-4]$')}, '-label parameter value must be one or more of w1, w2, w3, w4'. 'For example: -label w1 w2'); my @PAX = qw(X Y Z A); my %axtbl; foreach my $ax (@$dims) { $ax =~ /([xyzaXYZA])=[wW]([1-4])/; my $pippdim = util::line::toupper($1); my $sparkydim = $2; $axtbl{$pippdim} = $sparkydim; } # determine format string my $format = "%4d %3d "; my @dimfmt; for (my $i=0; $i<$dim; $i++) { push @dimfmt, '%7.2f'; } $format .= join(' ',@dimfmt); $format .= " %+8.2e %s %s\n"; my @lines; push @lines, "FORMAT $format"; push @lines, "VARS PkID\n"; my $id = 1; foreach my $peak (@$peaks) { my $pos = $peak->{'pos'}; my @coor; for (my $i=0; $i<$dim; $i++) { my $coor = $pos->[$axtbl{$PAX[$i]} - 1]; util::assert(sub{defined $coor}, 'please check your -axes command line parameter'); push @coor, $coor; } my $planeno = 0; my $intensity = $peak->{'intensity'}; my @ass; for my $L (@$label) { $L =~ /^[wW](\d)$/; my $d = $1; util::assert(sub{$d<=$dim},"bad dimension descriptor $d"); $d--; my $a = $peak->{ass}[$d]; push @ass, $a->{residue_label} . '.' . $a->{atom}; } my $blank = '****'; $ass[0] = $blank if $ass[0] eq '.'; $ass[1] = $blank if $ass[1] eq '.'; #util::print($peak->{ass}); if (scalar @ass != 2) { app::error("function only supports two-atom assignment labels"); } my $l = sprintf $format,$id++,$planeno,@coor,$intensity,@ass; push @lines, $l; } util::writefile(\@lines,$out); } sub assnm #todo delete this function { my $self = shift; my $arg = $self->{argv}; my $ifmt = $arg->{-ifmt}; my $ofmt = $arg->{-ofmt}; my $in = $arg->{-in}; my $out = $arg->{-out}; my $prog = $arg->{-prog}; util::assert(sub{ref $in ne 'ARRAY'}, 'only one input file allowed and '. util::line::reportlist(util::array::insure($in)). ' would be too many'); my $ass = nmr::readass($in,$prog,$ifmt); foreach my $atom (@$ass) { my $newatom = bio::protein::aminoacid::atom::translate( -atom=>$atom->{atom}, -aminoacid=>$atom->{residue}, -from=>$ifmt, -to=>$ofmt); $atom->{atom} = $newatom; } nmr::writeass($ass,$out,$prog); } sub assfmt { my $self = shift; my $arg = $self->{argv}; my $ifmt = $arg->{-ifmt}; my $ofmt = $arg->{-ofmt}; my $inom = $arg->{-inom}; my $onom = $arg->{-onom}; my $iseq = $arg->{-iseq}; my $in = $arg->{-in}; my $out = $arg->{-out}; my $reset = $arg->{'-reset'}; if ($ifmt eq 'cara') { $inom = 'cara'; } if ($ifmt eq 'cppn'){ $inom = 'cppn'; } if (not defined $inom) { app::error("parameter -inom is required with -ifmt $ifmt"); } if (not defined $onom and not defined $ofmt) { app::error('nothing to do: either -onom or -ofmt must be given') } $onom = $inom if not defined $onom; $ofmt = $ifmt if not defined $ofmt; util::assert(sub{ref $in ne 'ARRAY'}, 'only one input file allowed.'. util::line::reportlist(util::array::insure($in)). ' were given'); if (($ifmt eq 'xeasy' or $ifmt eq 'cara') and not defined $iseq) { app::error("sequence must be given with -iseq parameter ". "in order to read xeasy assignment table"); } if (defined $iseq and ($ifmt ne 'xeasy' and $ifmt ne 'cara')) { app::error("-iseq parameter must be used only in the combination with -ifmt xeasy or cara"); } my $ass = nmr::readass($in,$ifmt,$inom,$iseq); if (defined($reset) and $reset eq 'true') { $ass = nmr::resetresidues($ass); } nmr::ass::translate($ass,'iupac',$onom); nmr::writeass($ass,$out,$ofmt); } sub pdbnm { my $self = shift; my $arg = $self->{argv}; my $ifmt = $arg->{-ifmt}; my $ofmt = $arg->{-ofmt}; my $in = $arg->{-in}; my $out = $arg->{-out}; my $model = $arg->{-model}; util::assert(sub{ref $in ne 'ARRAY'}, 'only one input file allowed. And '. util::line::reportlist(util::array::insure($in)). ' would be too many'); util::assert(sub{ref $out ne 'ARRAY'}, 'only one output file allowed. And '. util::line::reportlist(util::array::insure($out)). ' would be too many'); my @par = ($ifmt,$ofmt); my @opt = ('pdb','xplor','iupac'); #util::assert($util::array::{allin},\@par,\@opt, # 'values of -ifmt and -ofmt must be one of '. # util::line::reportopt(\@opt)); my $str = bio::structure::readpdb($in,$ifmt); if (defined $model) { if ($model !~ /^\d+$/) { app::error("number expected with parameter -mod ". "'$model' found"); } my $max = scalar(@$str) - 1; util::assert($util::scalar::{inrange},$model-1,[0,$max], "requested model number $model is out of range ". "file $in contains only ". util::line::reportnum($max+1,'model')); $str = [$str->[$model-1]]; } bio::structure::writepdb($str,$out,$ofmt); } sub foldass { my $self = shift; my $arg = $self->{argv}; my $fmt = $arg->{-fmt}; my $in = $arg->{-in}; my $out = $arg->{-out}; my $ofmt = $arg->{-ofmt}; my $atoms = $arg->{-atoms}; my $nom = $arg->{-nom}; my $ref = $arg->{'-ref'}; my $sw = $arg->{-sw}; my $onom = $arg->{-onom}; $onom = $nom if not defined $onom; $ofmt = $fmt if not defined $ofmt; my @par = ($fmt,$ofmt); my @opt = ('pipp','sparky'); util::assert($util::array::{allin},\@par,\@opt, 'values of -ifmt and -ofmt must be one of '. util::line::reportopt(\@opt)); util::assert($util::scalar::{isnum},$ref, "parameter -ref must be a number"); util::assert($util::scalar::{isnum},$sw, "parameter -sw must be a number"); util::assert($util::array::{isin},$nom,['iupac','xplor'], 'value of -nom must be either iupac or xplor'); my $ass = nmr::readass($in,$fmt,$nom); internal::resonance::fold($ass,$ref,$sw,$atoms); if ($onom ne 'iupac') { nmr::ass::translate($ass,'iupac',$onom); } nmr::writeass($ass,$out,$ofmt); } sub asspipp2sparky { my $self = shift; my $arg = $self->{argv}; my $pipp = $arg->{-in}; my $sparky = $arg->{-out}; my $ass = pipp::readass($pipp); sparky::saveass($ass,$sparky); } sub mvexp { my $self = shift; my $argv = $self->{argv}; my $to = $argv->{-to}; my $from = $argv->{-from}; my $file = $argv->{-file}; my $sp = internal::spectrum::read($file); if (internal::spectrum::exists($sp,$from)) { internal::spectrum::rename($sp,$from,$to); internal::spectrum::write($sp,$file,'force'); } else { app::error("spectrum $from not found in dataset $file ". "try -inspect"); } } sub setaxis { my $self = shift; my $argv = $self->{argv}; my $name = $argv->{-spectrum}; my $file = $argv->{-file}; my $axis = $argv->{-axis}; if ($argv->{-fold} eq 'true') { if ($argv->{-unfold} eq 'true') { app::error('cannot use -fold and -unfold simultaneously'); } if (defined $argv->{-lim}) { my $lim = $argv->{-lim}; $argv->{-lim} = [sort {$a<=>$b} @$lim]; if (not (ref $lim eq 'ARRAY' and scalar @$lim == 2)) { app::error("-lim requires two numbers for the upper and lower ". "limits of chemical shift"); } elsif (not util::array::isnum($lim)) { app::error("values " . util::word::join($lim) . ' of -lim parameter are not understood '. '- two NUMBERS expected'); } } } my %d = internal::spectrum::defaults::axis(); my @allowed = keys %{$d{xyz2num}}; if (util::array::isin($axis,\@allowed)) { my $sp = internal::spectrum::read($file); if (internal::spectrum::exists($sp,$name)) { if ($argv->{-fold} eq 'true' and not defined $argv->{-lim}) { my $ax = internal::spectrum::getaxispar($sp,$name); if (not defined $ax->{hippm} or not defined $ax->{loppm}) { app::error('axis limits are required for '. 'folded dimension, use -lim '); } } internal::spectrum::setaxispar($sp,$argv); internal::spectrum::write($sp,$file,'force'); } } else { app::error("axis $axis not understood. Allowed axes are ". util::word::join(@allowed). '.'); } } sub setExpPar { my $self = shift; my $argv = $self->{argv}; my $par = $argv->{-par}; my $val = $argv->{-val}; my $name = $argv->{-spectrum}; my $file = $argv->{-file}; if (ref $file eq 'ARRAY') { app::error("only one file allowed for -setpar"); } my $SP = internal::spectrum::read($file); if (internal::spectrum::exists($SP,$name)) { internal::spectrum::setpar($SP,$name,$par,$val); util::savedata($SP,$file,'force'); } else { app::error("spectrum $name does not exist in $file try using -inspect"); } } sub inspect { my $self = shift; my $argv = $self->{argv}; my $file = $argv->{-file}; my $exp = $argv->{-spectrum}; if (ref $file eq 'ARRAY') { app::error("only one datafile allowed for ". "inspection at a time"); } my $SP = internal::spectrum::read($file); my @spectra; if (defined $exp) { if (internal::spectrum::exists($SP,$exp)) { push @spectra, $exp; internal::spectrum::printpar($SP,$exp); internal::spectrum::printaxes($SP,$exp); } else { app::error("spectrum $exp does not exist in $file->[0]"); } } else { @spectra = internal::spectrum::names($SP); } foreach my $spectrum (@spectra) { my @peaks = internal::spectrum::peaks($SP,$spectrum); my $bounds = internal::spectrum::bounds($SP,$spectrum); my $lo = internal::peak::new(); internal::peak::setcoor($lo,$bounds->[0]); my $hi = internal::peak::new(); internal::peak::setcoor($hi,$bounds->[1]); $lo = internal::peak::text($lo); $hi = internal::peak::text($hi); printf "%-20s%5d peaks\n",$spectrum, scalar(@peaks); print "ppm limits $lo - $hi\n"; } my $userdata = internal::spectrum::userdata::names($SP); if (scalar(@$userdata) > 0) { print "Other data available:\n"; foreach my $set (@$userdata) { print $set, "\n"; } } } sub filtPeaks { my $self=shift; my $argv = $self->{argv}; my $method = $argv->{-method}; my $in = $argv->{-in}; my $out = $argv->{-out}; # read peaks my $in = util::array::insure($in); my $SP = internal::spectrum::read(@$in); # do stuff my @methods = qw(isolated showdup); if (util::array::isin($method,\@methods)) { internal::spectrum::filter($SP,$method,$argv); } else { app::die("internal: method $method not understood"); } util::savedata($SP,$out); } sub noefilt { my $self=shift; my $argv = $self->{argv}; my $method = $argv->{-method}; my $in = $argv->{-in}; my $out = $argv->{-out}; my $ass = $argv->{-ass}; # read peaks my $in = util::array::insure($in); my $SP = internal::spectrum::read(@$in); my @methods = qw(integrated symm nosymm width mergesymm); if ($method eq 'ass') { my $symm = internal::spectrum::filter::ass($SP,$ass); internal::spectrum::setdata($SP,'noesymmetry',$symm); } elsif ($method eq 'dist') { my $pdb = $argv->{-pdb}; my $str; my $str = bio::structure::readpdb($pdb); my $ass = sparky::readass($argv->{-ass},'strict'); my $sym = internal::spectrum::getdata($SP,'noesymmetry'); my $prochiral_are_unambiguous = $argv->{-dst}; if ($prochiral_are_unambiguous eq 'true') { app::die('feature not implemented'); } my %accepted; my %rejected; my %undefined; KEY: foreach my $key (keys %$sym) { my $pairs = $sym->{$key}; #$accepted{$key} = [] if not defined $accepted{$key}; #$rejected{$key} = [] if not defined $rejected{$key}; #$undefined{$key} = [] if not defined $undefined{$key}; foreach my $pair (@$pairs) { my $a1 = $pair->[0]; my $a2 = $pair->[1]; my $peak1 = $a1->{peak}; my $peak2 = $a2->{peak}; my $ass1 = $a1->{ass}; my $ass2 = $a2->{ass}; my $spec1 = internal::peak::whatspectrum($peak1); my $spec2 = internal::peak::whatspectrum($peak2); # atomdef structure contains atom and residue my $atomdef1 = internal::noe::anchorreson($SP,$spec1,$ass1); my $atomdef2 = internal::noe::anchorreson($SP,$spec2,$ass2); # todo important fix this plug maybe # this is ok but error is upstream my @r = map {$_->{residue}} ($atomdef1,$atomdef2); next if ( bio::protein::sequence::badresidue($r[0]) or bio::protein::sequence::badresidue($r[1]) ); my $atom1 = bio::aminoacid::getambigatoms($atomdef1); my $atom2 = bio::aminoacid::getambigatoms($atomdef2); # generate all possible couples from arrays @$atom1 and @$atom2 my $possible_assignments = util::array::tuples($atom1,$atom2); my $cutoff = $argv->{-cutoff}; my $dist = $cutoff + 1;# outside range foreach my $couple (@$possible_assignments) { my ($atom1,$atom2) = @$couple; my $newdist = bio::structure::getmindist( $str, $atom1, $atom2 ); if (not defined $newdist) { push @{$undefined{$key}}, $pair; next KEY; } $dist = $newdist if $newdist < $dist; } if ($dist <= $cutoff) { push @{$accepted{$key}},$pair; } else { push @{$rejected{$key}},$pair; } } } util::print('accepted',scalar(keys %accepted)); util::print('rejected',scalar(keys %rejected)); util::print('undefined',scalar(keys %undefined)); internal::spectrum::setdata($SP,'accepted',\%accepted); internal::spectrum::setdata($SP,'rejected',\%rejected); internal::spectrum::setdata($SP,'undefined',\%undefined); } elsif (util::array::isin($method,\@methods)) { internal::spectrum::filter($SP,$method); } else { app::error("method $method not understood"); } util::savedata($SP,$out); } sub run { my ($self,$args) = @_; $self = bless {}; use File::Basename; $self->{program} = basename $0; #----- deal with help ------------------------ # if (not defined $args->[0]) { $self->printmenu(); exit; } if ($args->[1] eq '-help')# -fun -help { my $tmp = $args->[0]; $args->[0] = $args->[1]; $args->[1] = $tmp; } if ($args->[0] eq '-help')# { if (scalar(@$args) > 2)# input for help is too long { shift @$args; shift @$args; my $msg = util::line::reportlist($args,'argument', 'unexpected'); app::error($msg); } elsif (scalar(@$args) == 1)# asking for general help { $self->help('-help'); } else # help to a particular function { $self->{argv}{$args->[0]} = 'true'; $self->help($args->[1]); } exit; } # #---- #---- run the program ------ # if (not defined $fun{$args->[0]}) { app::error("function '$args->[0]' is unknown"); } my $er = $ERROR_REPORTING; $ERROR_REPORTING = 0; $self->parsearg($args); $ERROR_REPORTING = $er; $self->{cmd} = $args->[0]; $fun{$args->[0]}{'fun'}->($self); } sub psymmnoe { my $self = shift; my $argv = $self->{argv}; my $file = $argv->{-in}; my $pdb = $argv->{-pdb}; my $aria = $argv->{-orest}; my $spectra = util::array::insure($argv->{-spectra}); my $base = $argv->{-base}; my $chir = $argv->{-chir}; # some assertions before we get started my $sp; util::assert::nosuchfile($aria); if (not defined $spectra) { $sp = internal::spectrum::read($file); # todo here filter out non-noe spectra so that # their peaks are not used # maybe have a list of allowable noe spectra my @tmp = internal::spectrum::names($sp); my @spectra = map { $_ . '.list' } @tmp; util::assert::nosuchfile(\@spectra); $spectra = \@tmp; } else { util::assert::nosuchfile($spectra); $sp = internal::spectrum::read($file); } my $symm = internal::spectrum::getdata($sp,'noesymmetry'); my $fun = 'blah'; if (not defined $symm) { util::error("$fun requires symmetry search result ". "that seems absent from dataset $file ". 'see -noesymm function with -method options '. '\'symm\' and \'ass\''); } # assign a unique number to each peak my $id=0; internal::spectrum::apply( $sp,$spectra, sub{$_[0]->{id} = ${$_[1]}++}, \$id ); my $str; if (defined $pdb) { $str = bio::structure::readpdb($pdb); } my @aria; foreach my $key (keys %$symm) { my $pairs = $symm->{$key}; # todo important here handle ambiguous restraints correctly!!! # probably i actually need to fix 'ambigous atom expander' my @usable; foreach my $pair (@$pairs) { my ($p1,$p2) = @$pair; # two hoops to jump through # 1) distance match if pdb file is given my $dist; if (defined $pdb) { $dist = internal::noe::getdist($p1, $p2,$str,$sp); next if $dist > 5.0; next if not defined $dist; } # 2) at least one volume must be defined my $vol1 = internal::peak::getvol($p1->{peak}); my $vol2 = internal::peak::getvol($p2->{peak}); next if not defined $vol1 and not defined $vol2; # now save good pair into the list for output push @usable, $pair; } push @aria, internal::noe::aria(\@usable,$sp,$chir) ."\n"; # mark those peaks as used map { map {$_->{peak}{mark} = 'used'} @$_ } @usable; } util::writefile(\@aria,$aria); exit; foreach my $spec (@$spectra) { my @peaklist; my $peaks = internal::spectrum::getpeaks($sp,$spec); my @usable; foreach my $peak (@$peaks) { next if $peak->{mark} eq 'used'; next if not defined $peak->{noeass}; push @usable, $peak; } nmr::writepeaks($peaks,$spec . '.list'); } exit; } sub toolkit::crap::analyze { my $self = shift; my $argv = $self->{argv}; my $file = $argv->{-default}; my $p1 = $argv->{-p1}; my $p2 = $argv->{-p2}; my $sp = internal::spectrum::read($file); my $accepted = internal::spectrum::getdata($sp,'accepted'); my $rejected = internal::spectrum::getdata($sp,'rejected'); my $undefined = internal::spectrum::getdata($sp,'undefined'); my %contacts; foreach my $u (keys %$accepted) { my $list = $accepted->{$u}; foreach my $el (@$list) { my ($p1,$p2) = @$el; my $a1 = $p1->{ass}; my $a2 = $p2->{ass}; my $name1 = $p1->{peak}{spectrum}; $a1 = internal::noe::anchorreson($sp,$name1,$a1); my $name2 = $p2->{peak}{spectrum}; $a2 = internal::noe::anchorreson($sp,$name2,$a2); my $contact = join("\t",sort{util::cmpaa($a,$b)}($a1->{residue},$a2->{residue})); $contacts{$contact}++; } } my @contacts = sort {util::cmpaa($a,$b)} keys %contacts; map {print $_, "\n"} @contacts; exit; } sub help { my ($self,$fun) = @_; $self->printExtendedHelp($fun); exit; } sub message { my ($self,$msg) = @_; $msg = util::line::wrap('-line'=>$msg,'-indent'=>8,'-max'=>72); #my $prog = $self->{program}; print "\n$msg\n\n"; } sub printmenu { my %cat; foreach my $fun (keys %fun) { my $cat = $fun{$fun}{category}; if ($cat eq 'internal' or $cat eq 'DATA') { if (getlogin() ne $programmer) { next; } } my $category = $fun{$fun}{category} or app::die("internal: category not defined for function $fun"); $cat{$category}++; } foreach my $category (sort {$CAT{$a}{priority}<=>$CAT{$b}{priority}} keys %cat) { my $title = util::line::toupper($CAT{$category}{synopsis}); print "\n", util::line::wrap(-line=>$title,-indent=>15), "\n\n"; foreach my $fun (sort keys %fun) { next if $fun{$fun}{category} ne $category; my $line = $fun . ' ' . $fun{$fun}{'synopsis'}; print util::line::wrap(-line=>$line,-max=>80, -indent=>15,-firstword=>1); print "\n"; } } print "\n"; print("NOTE: type nt | more to read entire list of available functions"); print "\n\n"; } sub parsearg { my ($self, $args) = @_; my %arg; my $carg = '-default'; my $arg = [@$args];# copy the argument list my $fun = shift @$arg; if (not defined $fun{$fun}{par}) { if (scalar(@$arg) > 1) { app::error("function $fun does not require arguments. ". util::line::reportlist($arg,'argument','not understood.')); } elsif (scalar(@$arg == 1) and $arg->[0] ne '-help') { app::error("argument $arg->[0] not understood"); } } # 1. Deal with the default parameter my $default = $fun{$fun}{par}{default}; if (defined $default) { if ($arg->[0] =~ /^-/ or scalar(@$arg) == 0) # case where default nameless arument value (right after # the function swithch) was not specified by user { my $defaultval = $default->{default}; #last if not defined $defaultval; if (ref $default->{default} eq 'ARRAY') { $arg{'-default'} = $defaultval; } elsif (not ref $default->{default}) { $arg{'-default'} = [$defaultval]; } else { app::die("error: wrong value of ". "default parameter for $fun"); } } else # if it was specified { while (@$arg && $arg->[0] !~ /^-/) { my $val = shift @$arg; push @{$arg{'-default'}}, $val; } } } elsif (@$arg and $arg->[0] !~ /^-/) # a case where user specified a parameter that looks like # the value of nameless parameter, but the function specification # does not contain default (nameless) parameter { app::error("unexpected parameter $arg->[0]"); } # 2. Parse remaining arguments into a hash of arrays while (@$arg) { my $bit = shift @$arg; if ($bit =~ /^\-([a-zA-Z][a-zA-Z\d]*)?$/) { if (not defined $1) { app::error("argument $bit not understood"); } $carg = $bit; $arg{$carg} = []; next; } push @{$arg{$carg}}, $bit; } # 3. Now make sure that arguments satisfy specifications # given by the %fun hash which is in the beginning of # the file my $required = $fun{$fun}{par}{required}; my $optional = $fun{$fun}{par}{optional}; my $hardcoded = $fun{$fun}{par}{hardcoded}; #todo rehash arguments into one table my @err; #todo validate arguments here my @unknown; my @all = keys %$required; push @all, keys %$optional; push @all, keys %$hardcoded; push @all, '-help';# this one must be handled by individual functions if (defined $fun{$fun}{par}{default}) { push @all, '-default'; } if ($fun ne '-help') { foreach my $carg (keys %arg) { if (not util::array::isin($carg,\@all)) { push @unknown, $carg; } } if (@unknown) { my $message = util::line::reportlist(\@unknown, 'argument', 'not understood'); app::error($message); } } # %arg is a hash with arguments to a function foreach my $carg (keys %arg) { my $n = scalar(@{$arg{$carg}});# number of values to $carg if ($n == 0 && $carg ne '-default')# detect flags { if (util::array::isin($carg,[keys %$required])) { app::error("required parameter $carg must ". "have a value"); } elsif (util::array::isin($carg,[keys %$optional])) { if ($fun{$fun}{par}{optional}{$carg}{type} ne 'flag') { app::error("parameter $carg must have a value"); } } elsif (util::array::isin($carg,[keys %$hardcoded])) { if ($fun{$fun}{par}{hardcoded}{$carg}{type} ne 'flag') { app::error("software error - internal parameter ". "$carg must have a value"); } } $arg{$carg} = 'true';# assign boolean value to flag #todo here must be a check on argument type } elsif ($n == 1) { # convert arrays with one element to scalars $arg{$carg} = $arg{$carg}[0]; } } if (defined $optional) { foreach my $key (keys %$optional) { if (exists $optional->{$key}{default}) { if (not defined $arg{$key}) { $arg{$key} = $optional->{$key}{default} } } else { next if $optional->{$key}{type} eq 'flag'; app::die("internal: must be default for $key"); } } } # hardcoded parameters override all user supplied ones if (defined $hardcoded) { foreach my $key (keys %$hardcoded) { $arg{$key} = $hardcoded->{$key}; } } my @missing_required; if (defined $required) { my @required = keys %$required; foreach my $try (@required) { my $found = 0; foreach my $carg (keys %arg) { if ($carg eq $try) { $found = 1; } } if (not $found) { push @missing_required, $try; } } } if (@missing_required) { my $message = util::line::reportlist([sort @missing_required], 'required argument/value pair', 'missing'); app::error($message); } $self->{argv} = \%arg; return; } sub printExtendedHelp { my ($self, $function) = @_; my $spec = $fun{$function}; #print synopsis print "\n", util::line::wrap(-line=>$spec->{synopsis}), "\n\n"; #print usage my $default = $spec->{par}{default}; my $required = $spec->{par}{required}; my $optional = $spec->{par}{optional}; my $cmd = "\t". $self->{program} . " $function"; my $details; if (defined $default) { my $name = $default->{alias} || app::die("oops"); $cmd .= " <$name>"; $details = "\n\tDefault parameter\n"; my $descr = util::line::wrap(-line=>$default->{description}, -indent=>15,-max=>80); $details .= sprintf "\t%-15s%s\n", "<$name>", $descr; } if (defined $required) { if (keys %$required) { $details .= "\n\tRequired parameters:\n"; } foreach my $key (sort keys %$required) { my $name = $key; $name =~ s/^-(.*)$/<\1VAL>/; $cmd .= " $key $name"; my $line = $name . ' ' . $required->{$key}; my $descr = util::line::wrap(-line=>$line); $details .= $descr . "\n"; } } if (defined $optional) { if (keys %$optional) { $details .= "\n\tOptional parameters:\n"; } foreach my $key (sort keys %$optional) { $key =~ /^-(.*)$/; my $name = "<$1VAL>"; my $det; if ($self->isflag($optional->{$key})) { $name = "<$key>"; $det = $key; $cmd .= " [$key]"; } else { $det = $name; $cmd .= " [$key $name]"; } my $line = $name . ' ' . $optional->{$key}{description}; my $descr = util::line::wrap(-line=>$line); $details .= $descr . "\n"; } } print util::line::wrap(-line=>"Usage: $cmd"), "\n"; print $details, "\n"; if (length($spec->{help}) > 0) { my $line = $spec->{help}; $line = util::line::wrap(-line=>$line,-max=>80,-indent=>8); print "$line\n\n"; } } sub isflag { my ($self,$par) = @_; return 0 if not defined $par->{type}; return 1 if $par->{type} eq 'flag'; } sub exportAss { my $self = shift; my $argv = $self->{argv}; my $in = $argv->{-in}; my $out = $argv->{-out}; my $format = $argv->{-format}; my $seq = $argv->{-seq}; my $ass = util::readdata($in); if (defined $seq) { $ass = internal::resonance::seqfilt($ass,$seq); } if ($format eq 'sparky') { app::error("feature not implemeted"); sparky::saveass($ass,$out); } elsif ($format eq 'pipp') { pipp::saveass($ass,$out); } } sub importAss { my $self = shift; my $argv = $self->{argv}; my $in = $argv->{-ass}; my $dat = $argv->{-dat}; my $format = $argv->{-fmt}; my $seq = $argv->{-seq}; my $nom = $argv->{-nom}; my $ass; if ($format eq 'sparky') { $ass = sparky::readass($in); } elsif ($format eq 'pipp') { $ass = pipp::readass($in); } if (defined $seq) { $ass = internal::resonance::seqfilt($ass,$seq); } foreach my $atom (@$ass) { $atom->{atom} = bio::protein::aminoacid::atom::translate( -atom=>$atom->{atom}, -aminoacid=>$atom->{residue}, -from=>$nom, -to=>'iupac'); } my $sp; if (-f $dat) { $sp = internal::spectrum::read($dat); } else { $sp = internal::spectrum::new(); } internal::spectrum::setdata($sp,'ASS',$ass); util::savedata($sp,$dat,'force'); } sub mergeRefinePeakCoor { my $self = shift; my $argv = $self->{argv}; my $list1 = $argv->{-list1}; my $list2 = $argv->{-list2}; my $axes = $argv->{-axes}; my $out = $argv->{-out}; my $axes2 = $argv->{-axes2}; if (ref $list1) { util::error("only one file permitted with -list1"); } if (ref $list2) { util::error("only one file permitted with -list2"); } my $sp1 = internal::spectrum::read(@$list1); my $sp2 = internal::spectrum::read(@$list2); my @pk1 = internal::spectrum::peaks($sp1); my @pk2 = internal::spectrum::peaks($sp2); $axes = [$axes] if (!ref $axes); my $dim = scalar(@{$pk1[0]->{pos}}); foreach my $ax (@$axes) { if ($ax !~ /^\d+$/) { app::error("-axes parameter only accepts numbers"); } if ($ax > $dim) { app::error("axis number in -axes for ". "$list1 must be <= $dim ($ax given)"); } } my %ax; #hash table with correspondence of axes if (not defined $axes2) # match -axes to axes of list2 { $axes2 = internal::spectrum::axis(\@pk2); #todo fix here!!! my %ax1 = internal::spectrum::center(\@pk1,$axes); my %ax2 = internal::spectrum::center(\@pk2,$axes2); my @ax1 = sort {$ax1{$a}<=>$ax1{$b}} keys %ax1; my @ax2 = sort {$ax2{$a}<=>$ax2{$b}} keys %ax2; if (scalar @ax1 != scalar @ax2) { app::error('could not guess which dimensions to use '. 'for -list2, please use -axes2 parameter'); } # assign correspondence of the axes %ax = util::array::hash(\@ax1,\@ax2); } else { $axes2 = [$axes2] if (!ref $axes2); if (scalar(@$axes) != scalar(@$axes2)) { app::error('-axes and -axes2 must have same '. 'number of values to match'); } else { my $dim = scalar(@{$pk2[0]->{pos}}); foreach my $ax (@$axes2) { if ($ax !~ /^\d+$/) { app::error("-axes parameter only ". "accepts numbers"); } if ($ax > $dim) { app::error("axis number in -axes for ". "$list2 must be <= $dim ". "($ax given)"); } } %ax = util::array::hash($axes,$axes2); } } #todo here - find closest peaks in list2 to the ones in list1 #using the %ax hashtable for axis correspondence } sub genmars #generate mars input peak table from .save files of sparky3.1 #allowed by mars atom assignments (CO-1, etc.) are hardcoded in #the end of this subroutine #this subroutine will ignore peaks with more then two residues involved #and will print a warning to STDERR { my @mars_atoms = qw(CA CB CO HA H N);#atoms that are included in mars program my $self = shift; my %argv = %{$self->{argv}}; my @mars_atoms_full = qw(CA CA-1 CB CB-1 CO CO-1 HA HA-1 H N); my %transl = (HN=>'H',NH=>'N',"C\'"=>'CO'); my @argv = @ARGV; my $fmt = $argv{-fmt}; if (not util::array::isin($fmt,['pipp','sparky'])) { app::error('value of -fmt parameter must be either \'pipp\' or \'sparky\''); } my @seq = bio::protein::sequence::read($argv{-seq}); shift @seq;# todo fix this plug my $exclude_atoms = util::array::insure($argv{-exclude}); if (scalar(@$exclude_atoms)>0) { if (not util::array::allin($exclude_atoms, \@mars_atoms)) { app::error('all values of -exclude parameter must one of '. 'the atoms recognized by mars: ' . join(', ',@mars_atoms)); } } #collect chemical shift information my @peaks; if ($fmt eq 'sparky') { my @files = glob '*.save'; foreach my $file (@files) { my $peaks = sparky::readpeaks($file); push @peaks, @$peaks; } #collect what kind of resonances there are #now we want to assemble pseudoresidues } elsif ($fmt eq 'pipp') { my @files = glob '*.PCK'; foreach my $file (@files) { my $peaks = sparky::readpeaks($file); push @peaks, @$peaks; } } toolkit::validate::peaks(\@peaks, 'silent'); my %PR;#pseudoresidues foreach my $peak (@peaks) { next if not defined $peak->{ass}; #collect residues involved my %res; my %atoms; foreach my $ass (@{$peak->{ass}}) { $res{$ass->{residue_label}} = $ass;#to remember example of ass $atoms{$ass->{atom}}++; } my @atoms = translate_atoms([keys %atoms], \%transl); next if not util::array::somein(\@atoms, \@mars_atoms); #pack peaks into pseudoresidues, and parse them later my @res = keys %res; foreach my $root (@res) { #do not use not fully unidentified resonances as root next if not defined $res{$root}{ass_type}; push @{$PR{$root}{peaks}}, $peak; if (not defined $PR{$root}{ass}) { #note that atom name is obsolete here $PR{$root}{ass} = $res{$root}; } else { #todo probably this is not necessary??? my $old = $PR{$root}{ass}; my $new = $res{$root}; util::dump({message=>'oops...program died', old=>$old,new=>$new, root=>$root}) if not util::hash::equal( $old,$new, [ qw( seq_pos residue_name ass_type residue_label ) ]); } } } my $res = internal::resonance::calculate(\@peaks); #use Data::Dumper; #print Dumper($res),"\n";exit; my $num = 1; #parse pseudoresidues foreach my $root (keys %PR) { my $peaks = $PR{$root}{peaks}; my $root_ass = $PR{$root}{ass}; my $root_seq_pos = $root_ass->{seq_pos}; #filter atoms from each of the peaks and include them #into output if they match mars's criteria #1) atom types belong to mars #2) residue is either root or root-1 foreach my $peak (@$peaks) { my @ass = @{$peak->{ass}}; foreach my $ass (@ass) { my $label = toolkit::getlabel($ass); my $frequency = $res->{$label}{center}; my @atoms; push @atoms, $ass->{atom}; my @atoms = translate_atoms([$ass->{atom}],\%transl); next if !util::array::allin(\@atoms,\@mars_atoms); next if util::array::allin(\@atoms,$exclude_atoms); my $rel = $ass->{seq_pos} - $root_seq_pos; next if $rel < -1; next if $rel > 0; next if ($root_ass->{ass_type} ne $ass->{ass_type}); if ($root_ass->{ass_type} eq 'pseudoassigned') { #todo this should be checked in checkpeaks next if $root_ass->{residue_name} ne $ass->{residue_name}; } my $atom_name = ($rel == -1)? $atoms[0] . '-1' :$atoms[0]; next if !util::array::allin([$atom_name],\@mars_atoms_full); $PR{$root}{resonances}{$atom_name} = $frequency; } } } my %atoms; foreach my $pr (keys %PR) { delete $PR{$pr}{peaks}; delete $PR{$pr}{ass}; foreach my $atom (keys %{$PR{$pr}{resonances}}) { $atoms{$atom}++; } } my @atoms = sort keys %atoms; my $field_width = 8; printf '%8s', ''; foreach (@atoms) { printf "\%$field_width\s", $_ ; } print "\n"; foreach my $pr (keys %PR) { printf '%-8s', $pr; my $res = $PR{$pr}{resonances}; foreach my $atom (@atoms) { if (defined $res->{$atom}) { printf "\%$field_width\.3f", $res->{$atom}; } else { printf "\%$field_width\s", '-'; } } print "\n"; } } sub getlabel { my $ass = shift; return $ass->{residue_label} .'@'. $ass->{atom}; } sub peak_id #return stringified full description of peak { my $peak = shift; return "at " . join(':', @{$peak->{pos}}) . " assigned as " . ass_id($peak->{ass}), " in $peak->{spectrum} spectrum"; } sub ass_id { my $ass = shift; return join(':', map {$_->{residue_label} . $_->{atom}} @$ass); } sub translate_atoms #attempt to translate atom names #leave name unchanged if there is no translation { my ($input,$tr_table) = @_; my @output; foreach my $in (@$input) { push @output, $tr_table->{$in} || $in; } return @output; } sub searchpp { #search prefix - common beginning of the search path my $self = shift; my $argv = $self->{argv}; my $users = util::array::insure($argv->{-users}); my $pp = util::array::insure($argv->{-pp}); my $list = $argv->{-list}; my @users = keys %user_nmr_dir; if (defined $users) { @users = @$users; } my %ct; my %stat; foreach my $user (@users) { if (not defined $user_nmr_dir{$user}) { app::error("user nmr data directories not specified for $user ". "modify definition of \%user_nmr_dir hash table in the code\n"); } my $path = util::array::insure($user_nmr_dir{$user}); my @files; print "inspecting $user\'s data... "; foreach my $p (@$path) { if ($p eq '') { $p = "~$user/data/$user/nmr/*/*/pulseprogram"; } else { $p = $p . '/*/*/pulseprogram'; } my @new_files = glob $p; push @files, @new_files; } print scalar(@files), " experiments found\n"; my @upp; foreach my $file (@files) { open F, "<$file" or app::die("cant read $file\: $!"); my $line = ; my $p = getpp($line); if (defined $pp) { my $regex = join('|',@$pp); next if $p !~ /$regex/i; } push @upp, $p; push @{$stat{$user}{$p}},$file; } foreach my $pp (@upp) { # $pp = getpp($pp); $ct{$pp}{count}++; $ct{$pp}{users} = [] if not defined $ct{$pp}{users}; addifnew($ct{$pp}{users}, $user); } } my @spp = sort { $ct{$b}{count}<=>$ct{$a}{count} } keys %ct; print "COUNT PULSEPROGRAM USERS\n"; foreach my $pp (@spp) { printf "%3d %-20s ", $ct{$pp}{count}, $pp; print join(', ', @{$ct{$pp}{users}}), "\n"; next if $list eq 'true'; foreach my $user (@{$ct{$pp}{users}}) { my $files = $stat{$user}{$pp}; if (defined $files) { print "\n"; foreach my $file (@$files) { print " "; $file =~ s/pulseprogram//; print $file; print "\n"; } print "\n"; } } } print "\n"; sub getpp { my $in = shift; my @bits = split /\//, $in; $bits[$#bits] =~ s/["\s]//g; return $bits[$#bits]; } sub addifnew { my ($ar, $what) = @_; foreach (@$ar) { return if $_ eq $what; } push @$ar, $what; } } sub funhelp { my ($this, $msg) = @_; if (exists $this->{argv}{-help}) { print $msg, "\n"; exit; } } sub idexp { #todo add asking for a comment in the interactive regime #may want to use footprint my $self = shift; my %argv = %{$self->{argv}}; my $file = 'pulseprogram'; #hash reference to all experiment internals my $EXP = initExp(); if (defined $argv{-dir}) { if ($argv{-dir} !~ /\/$/) { $argv{-dir} .= '/'; } $file = $argv{-dir} . $file; } if (not -f $file) { app::error("no $file file found in './' ; try -dir option or use cd."); } push @FILES, glob($file); open F, "<$file" or die "\n\t$0:\n\tcan't open $file for reading: $!"; #open F, "cat /auto_nfs/data2/users/rclubb/data/*/*/*/[1-9]/pulseprogram | " # or die "grep doesn't work: $!"; #print $ARGV[0], "\n"; my @prog; my @var; my @defines; my $line; while ($line = ) { chomp $line; $line =~ s/^\s+//; # remove leading space $line =~ s/\s+$//; # remove trailing space PHASE: if ($line =~ /^ph[0-9]+/) { LONGPHASE: while ($line = ) { $line =~ s/^\s+//; # remove leading space $line =~ s/\s+$//; # remove trailing space chomp $line; next LONGPHASE if $line =~ /^\s*$/;# skip empty lines if ($line =~ /^[\s\d]*$/) { next LONGPHASE; } goto PHASE if $line =~ /^ph[0-9]+/; last LONGPHASE; } } next if $line =~ /^#/; # don't follow dependencies for now next if $line =~ /^\s*;/; # get rid of comments next if $line =~ /^\s*$/; # skip empty lines #there are also define statements if ($line =~ /^\"/) { push @var, $line; } elsif ($line =~ /^define/) { push @defines, $line; } else { push @prog, $line; } } # remove trailing comments and spaces s/([^\s])\s*;.*$/\1/ foreach (@prog); s/([^\s])\s*;.*$/\1/ foreach (@var); s/([^\s])\s*;.*$/\1/ foreach (@var); #remove labels s/^\d+\s+(.*)$/\1/ foreach (@prog); my %var; my @crap; #collect items foreach (@prog) { my @words = split /\s+|:|\*/; foreach (@words) { next if /^\s*$/; s/[()]//g; push @crap, $_; if (/^(f\d+)$/) { $var{$1}{count}++; $var{$1}{type}='channel' } elsif (/^(p[\d]+)$/) { $var{$1}{count}++; $var{$1}{type}='pulse' } elsif (/^(d\d+)/) { $var{$1}{count}++; $var{$1}{type}='delay' } elsif (/^(gp\d+)$/) { $var{$1}{count}++; $var{$1}{type}='gradpulse' } elsif (/(cpd\d+)|(cpds\d+)/) { $var{$1}{count}++; $var{$1}{type}='decoupling' } elsif (/^(pl\d+)$/) { $var{$1}{count}++; $var{$1}{type}='power' } elsif (/^(sp\d+)$/) { $var{$1}{count}++; $var{$1}{type}='shapedpulse' } else { #print "unknown token: $_\n"; } } } foreach (@defines) { last;#ignore defines for now if (/^define\s+pulse\s(.*)/) { $var{$1}{count}++; $var{$1}{type} = 'pulse'; } elsif (/define\s+delay\s(.*)/) { $var{$1}{count}++; $var{$1}{type} = 'delay'; } elsif (/define\s+gradient\s(.*)/) { $var{$1}{count}++; $var{$1}{type} = 'gradpulse'; } else { #system "~/fadeev/bin/note todo $0: unsupported define type: $_"; } } foreach (@var) { s/^"(.*)=.*$/\1/; s/\s//g; $var{$_}{count} = 0 if defined $var{$_}; } my @undefined; foreach (keys %var) { die "oops" if not defined $var{$_}; push @undefined, $_ if $var{$_}{count} != 0; } if (defined $argv{'-val'}) { push @FILES, glob("$argv{-dir}acqus"); open F, "<$argv{-dir}acqus" or die "can't open file $argv{-dir}acqus:$!"; my @ln = ; $var{date} = $ln[5]; my @bits = split /\s+/, $var{date}; shift @bits;pop @bits;pop @bits; pop @bits; $var{date} = join(' ',@bits); $var{location} = $ln[6]; $var{location} =~ s/\$\$ //; $var{location} =~ s/acqus$//; foreach (@undefined) { /^p(\d+)/ && do { setaval(\%var, \@ln,$_,'P',$1)}; /^sp(\d+)/ && do { setSval(\%var, \@ln,$_,"SPNAM$1")}; /^d(\d+)/ && do { setaval(\%var, \@ln,$_,'D',$1)}; /^pl(\d+)/ && do { setaval(\%var, \@ln,$_,'PL',$1)}; /^gp(\d+)/ && do { setSval(\%var, \@ln,$_,"GPNAM$1")}; } my @pulses; my @shapedpulses; my @delays; my @powerlevels; foreach (@undefined) { /^p(\d+)/ && do { push @pulses, $_ }; /^sp(\d+)/ && do { push @shapedpulses, $_ }; /^d(\d+)/ && do { push @delays, $_ }; /^pl(\d+)/ && do { push @powerlevels, $_ }; } @pulses = sort {&ord($a,$b)} @pulses; @shapedpulses = sort {&ord($a,$b)} @shapedpulses; @delays = sort {&ord($a,$b)} @delays; @powerlevels = sort {&ord($a,$b)} @powerlevels; my $pulseprogram = getline($argv{-dir}.'pulseprogram',1); my $dir = getline($argv{-dir}.'acqus',7); my $host = getline($argv{-dir}.'acqus',6); $pulseprogram =~ s/^.*\"(.*)\".*$/$1/; $host =~ s/^.*\@([^.\ :]+).*$/$1/; $dir =~ s/^\$\$ (.*)acqus$/$1/; chomp $dir; chomp $pulseprogram; chomp $host; use Cwd; my $title; if (defined $argv{-title}) { $title = join(' ',@{$argv{-title}}); $title =~ tr/[a-z]/[A-Z]/; } if (defined $EXP->{getpp($pulseprogram)}) { my $dat = $EXP->{getpp($pulseprogram)}; my $title = $title || $dat->{title}; } if (defined $title) { print "\n\n ", $title, "\n\n\n\n"; } else { print "\n\n\n\n"; } print "DATE $var{date}\n\n"; print "DATA $host:$dir\n\n"; print "PROC ", getcwd(), "\n\n"; print "PULPROG $pulseprogram\n\n"; my $comment = $argv{-comment}; if (defined $comment) { if (util::array::isa($comment)) { $comment = join(' ',@$comment); } $comment = "COMMENT $comment"; print util::line::wrap(-line=>$comment,-indent=>13,-firstword=>'COMMENT'); print "\n\n"; } printDimPar(); printNumbers(' PULSES, us',\@pulses); printNumbers('POWER LEVELS, dB',\@powerlevels); printNumbers(' DELAYS, s',\@delays); printNames(' SHAPED PULSES',\@shapedpulses); } else { foreach (@undefined) { print "$_ -> $var{$_}{value}\n"; } } sub getpp { my $in = shift; my @bits = split /\//, $in; $bits[$#bits] =~ s/["\s]//g; return $bits[$#bits]; } sub printNumbers { my ($title,$ar) = @_; my $i = 0; print " $title\n\n" if @$ar; foreach (@$ar) { $i++; my $val = $var{$_}{value}; if (abs($val) > 10000 || abs($val) < 0.5) { printf "%-5s%10.3e ", $_, $val;} else { printf "%-5s%10.3f ", $_, $val;} print "\n\n" if not $i % 4; } print "\n\n" if $i % 4; } sub printNames { my ($title,$ar) = @_; my $i = 0; print " $title\n\n" if @$ar; foreach (@$ar) { $i++; $var{$_}{value} =~ s/<(.*)>/\1/; printf "%-5s%10s ", $_, $var{$_}{value}; print "\n\n" if not $i % 4; } print "\n\n" if $i % 4; } sub ord { my ($a,$b) = @_; $a =~ /[^\d]+(\d+)$/; $a = $1; $b =~ /[^\d]+(\d+)$/; return $a<=>$1; } sub setSval { my ($var,$ln,$key,$nm) = @_; my $i; for ($i=0; $i<@$ln;$i++) { last if $ln->[$i] =~ /^##\$$nm/; } $ln->[$i] =~ /\s([^\s]+)$/; $var->{$key}{value} = $1; return; } sub setaval { my ($var,$ln,$key,$nm,$num) = @_; my $i; for ($i=0; $i<@$ln;$i++) { last if $ln->[$i] =~ /^##\$$nm/; } my @bits; for(;;) { $i++; chomp $ln->[$i]; $ln->[$i] =~ s/^\s+(.*)\s+$//; push @bits, split(/\s+/, $ln->[$i]); if (@bits > $num) { $var->{$key}{value} = $bits[$num]; return; } } } sub getline { my ($file, $lineno) = @_; push @FILES, glob($file); open F, "<$file" or die "$0 cannot open $file for reading: $!"; my @lines = ; return $lines[$lineno - 1]; } sub printDimPar { my $dim = 1; my ($temperature,$decim,$dspfvs,$ns); my @par; my $found = 0; my $instrument; #read parameters from acqfiles while (-f $argv{-dir}.acqfile($dim)) { $found = 1; die "more then 4D experiment?" if $dim > 4; my $lines = [util::readfile($argv{-dir}.acqfile($dim))]; if ($dim == 1) { $ns = findpar('NS', $lines); $decim = findpar('DECIM', $lines); $dspfvs = findpar('DSPFVS', $lines); $temperature = findpar('TE', $lines); } $par[$dim]{td} = findpar('TD', $lines); $par[$dim]{sw} = findpar('SW_h', $lines); $par[$dim]{freq} = findpar('BF1', $lines); $par[$dim]{o} = findpar('O1', $lines); $par[$dim]{sfo} = findpar('SFO1', $lines); $par[$dim]{offset} = $par[$dim]{o}/$par[$dim]{freq}; my $nucleus = findpar('NUC1', $lines); $nucleus =~ s/^<(.*)>$/$1/; $par[$dim]{nucleus} = $nucleus; $dim++; } #todo this is not working, needs to be fixed my $lines = [util::readfile($argv{-dir}.'acqu')]; $instrument = findpar('INSTRUM', $lines); $instrument =~ s/^[^\d]*([1-9]00)[^\d]$/$1/; if (not $found) { print "$0: no bruker files found in the current directory\n"; exit; } $dim--; #start printing the output print "--------------------------------------------------------------------------------\n\n"; printf "%-25s %10d\n\n", "SCANS", $ns; printf "%-25s %10d%s\n\n", "TEMPERATURE", $temperature, "K"; my @dimname = qw(unused 1 2 3 4); my $head = "%-25s "; my $format1 = "%10d "; my $format2 = "%10s "; my $format3 = "%10.5f "; my $format4= $format2 . $format2; printf $head, "NUM. POINTS (TOTAL)"; for (my $i = 1; $i <= $dim; $i++) { my $td = $par[$i]{td}; die "fractional TD parameter $td for $dim dimension" if (int($td) - $td) != 0; $par[$i]{td}-- if int($td/2)*2 != $td; printf $format1, $par[$i]{td}; }; print "\n\n"; printf $head, "SPEC. WIDTH, Hz"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, $par[$i]{sw}}; print "\n\n"; printf $head, "FREQUENCY(BF1-3), MHz"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, $par[$i]{freq}}; print "\n\n"; printf $head, "CARRIER OFFSET, ppm"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, $par[$i]{offset}}; print "\n\n"; printf $head, "CARRIER OFFSET(O1-O3), Hz"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, $par[$i]{o}}; print "\n\n"; printf $head, "NUCLEUS"; for (my $i = 1; $i <= $dim; $i++) { printf $format2, $par[$i]{nucleus}}; print "\n\n"; print "--------------------------------------------------------------------------------\n\n"; } sub findpar { my ($name, $lines) = @_; my @par = grep /^##\$$name=/, @$lines; $par[0] =~ s/^##\$$name=(.*)$/$1/; $par[0] =~ s/^\s*(.*)$/$1/; chomp $par[0]; return $par[0]; } sub acqfile { my $dim = shift; return 'acqus' if $dim == 1; return 'acqu' . $dim . 's'; } sub fm #frequency match { my ($actual,$target) = @_; return 1 if (abs($target - $actual) < 0.01); return 0; } } sub conv { my $self = shift; my $dim = 1; my ($temperature,$decim,$dspfvs,$ns); my @par; my $found = 0; my %argv = %{$self->{argv}}; my $instrument = $self->{argv}{-mhz}; #and any permutation of four digits 1-4 for the 4D data #read parameters from acqfiles #number of dimensions must be determined from the pulse program #and not from acqu file number todo fix it later $argv{-dir} .= '/' if ($argv{-dir} !~ /\/$/); while (-f $argv{-dir} . acqfile($dim)) { $found = 1; die "more then 3D experiment?" if $dim > 3; my $lines = [util::readfile($argv{-dir} . acqfile($dim))]; if ($dim == 1) { $ns = findpar('NS', $lines); $decim = findpar('DECIM', $lines); $dspfvs = findpar('DSPFVS', $lines); $temperature = findpar('TE', $lines); } $par[$dim]{td} = findpar('TD', $lines); $par[$dim]{sw} = findpar('SW_h', $lines); $par[$dim]{freq} = findpar('BF1', $lines); $par[$dim]{o} = findpar('O1', $lines); $par[$dim]{sfo} = findpar('SFO1', $lines); $par[$dim]{offset} = $par[$dim]{o}/$par[$dim]{freq}; my $nucleus = findpar('NUC1', $lines); $nucleus =~ s/^<(.*)>$/$1/; $par[$dim]{nucleus} = $nucleus; $dim++; } $dim--; #determine instrument proton frequency as a maximum frequency if (not defined $instrument) { my @frq; for (my $i=1;$i<=$dim;$i++) { push @frq, $par[$i]{freq}; } $instrument = util::array::max(\@frq); } # this clause will automatically calibrate reference ppm offsets # in all dimensions if ($argv{-noref} eq 'false') { # determine nuclei based on instrument type # and take a note of proton dimension for (my $i = 1; $i <= $dim; $i++) { my $r = $par[$i]{freq}/$instrument; if ( fm($r,1 )) { $par[$i]{label} = '1H'; } elsif ( fm(0.1,$r) ) { $par[$i]{label} = '15N'; } elsif ( fm(0.25,$r) ) { $par[$i]{label} = '13C'; } else { $par[$i]{label} = undef; } } # die if experiment is not proton detected if ($par[1]{label} ne '1H') { app::message( "Please rerun this with -noref flag. ". "Automatic referencing based on 1H shift failed since ". "the carrier frequency in the direct ". "dimension is smaller then ". "the carrier frequency in one of the indirect ". "dimensions and it seems that ". "this experiment might not be proton detected."); exit 1; } my $BF; if (defined $argv{'-ref'}) { my $ppmref = $argv{'-ref'}; util::assert( sub{ util::scalar::isnum($ppmref) and $ppmref > 3.8 and $ppmref < 5.2; }, "value of -ref is expected as a number > ". "3.8 and < 5.2 (ppm for HDO chemical shift)" ); $par[1]{offset} = $ppmref; # determine true frequency corresponding to # zero chemical shift in proton dimension $BF = 0.000001*((1000000-$ppmref)*$par[1]{freq} + $par[1]{o}); } else { $BF = $par[1]{freq}; } #my %rat = ('15N'=>0.101329118,'13C'=>0.251449530,'1H'=>1); # figure out automated referencing #for (my $i = 2; $i <= $dim; $i++) #{ #my $label = $par[$i]{label}; # util::assert(sub{defined $label},"Please rerun this with -noref flag ". #"because type of nucleus could not be detemined ". #"for dimension $i"); # my $bf = $BF*$rat{$label}; # $par[$i]{o} = $par[$i]{o} + 1000000*($par[$i]{freq} - $bf); # $par[$i]{offset} = $par[$i]{o}/$bf; #} } if (not $found) { print "$0: no bruker files found in the current directory\n"; exit; } my $pulseprogram = getline($argv{-dir}.'pulseprogram',1); my $dir = getline($argv{-dir}.'acqus',7); my $host = getline($argv{-dir}.'acqus',6); $pulseprogram =~ s/^.*\"(.*)\".*$/$1/; $host =~ s/^.*\@([^.\ :]+).*$/$1/; $dir =~ s/^\$\$ (.*)acqus$/$1/; chomp $dir; chomp $pulseprogram; chomp $host; use Cwd; my @info; my $EXP = initExp(); use File::Basename; my $ppname = basename($pulseprogram); my $expname; if (defined $EXP->{$ppname}) { push @info, "#experiment $EXP->{$ppname}{title}\n#\n"; $expname = $EXP->{$ppname}{title}; } push @info, "#number of scans $ns\n"; push @info, "#temperature $temperature\K\n#\n"; push @info, "#data $host:$dir\n"; push @info, "#processing ", getcwd(), "\n"; push @info, "#pulseprogram $pulseprogram\n"; push @info, "#\$Id\$\n"; my $swap = '-swap'; if (defined $argv{-axisorder}) { my $ord = $argv{-axisorder}; my @msg = ( undef, undef, 'allowed values are 12 and 21', 'allowed values are 123, 321, 132, etc.', 'allowed values are 1234, 4312, 2413, atc.'); if (bad_ord($ord,$dim)) { app::error("bad axis order parameter: $ord.\n$msg[$dim]\n"); } #now swap axes here my @new_par; my @ax = split(/\B/,$ord); my $i = 1; foreach my $ax (@ax) { $new_par[$i++] = $par[$ax]; } @par = @new_par; } sub bad_ord { my ($ord,$dim) = @_; my @ax = split /\B/, $ord; return 1 if @ax > 4 or $dim != @ax; my %ax; foreach my $ax (@ax) { $ax{$ax}++; } my @val = values %ax; my $prod = 1; foreach (@val){ $prod *= $_ ;} return 1 if $prod != 1; @ax = sort {$a <=> $b} @ax; return 1 if shift(@ax) != 1; return 1 if pop(@ax) != $dim; return 0; } #start printing the output if ($argv{'-brief'} eq 'true') { $expname = (defined $expname)?$expname:$ppname; $expname =~ s/$ENDOFLINE//g; $expname =~ s/^(.{1,26}).*/\1/; printf "%-27s", $expname; for (my $i=1; $i<=$dim; $i++) { my $sw_ppm = sprintf '%.3f', $par[$i]{sw}/($par[$i]{freq}); my $offset = sprintf '%.3f', $par[$i]{offset}; my $nucleus = $par[$i]{nucleus}; print "$nucleus($offset/$sw_ppm) "; } print "\n"; } else { # here I print regular convert.com output print "#!/bin/sh -f\n"; print "#\n"; #print "#spectrometer $instrument\MHz\n"; print @info; print "#\n"; #print "#\$Author\$\n#\$Id\$\n"; print "#\n"; print "bruk2pipe -in ser -DMX $swap -decim $decim -dspfvs $dspfvs \\\n"; my @dimname = qw(unused x y z a); $par[0]{mode}='unused'; $par[1]{mode}='DQD'; $par[2]{mode}='Complex'; $par[3]{mode}='Complex'; $par[4]{mode}='Complex'; my $format1 = "%-6s %14d "; my $format2 = "%-6s %14s "; my $format3 = "%-6s %14.3f "; my $format4= $format2 . $format2; for (my $i = 1; $i <= $dim; $i++) { my $td = $par[$i]{td}; die "fractional TD parameter $td for $dim dimension" if (int($td) - $td) != 0; $par[$i]{td}-- if int($td/2)*2 != $td; printf $format1, "\-$dimname[$i]N", $par[$i]{td}; }; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format1, "\-$dimname[$i]T", $par[$i]{td}/2 }; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format2, "\-$dimname[$i]\MODE", "$par[$i]{mode}"}; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, "\-$dimname[$i]\SW", $par[$i]{sw}}; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, "\-$dimname[$i]\OBS", $par[$i]{freq}}; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format3, "\-$dimname[$i]\CAR", $par[$i]{offset}}; print "\\\n"; for (my $i = 1; $i <= $dim; $i++) { printf $format2, "\-$dimname[$i]LAB", $par[$i]{nucleus}}; print "\\\n"; printf $format4, "-ndim", $dim, "-aq2D", "States"; print " \\\n"; if ($dim == 2) { print "-out ft/data.fid -ov -verb\n"; } else { print "-out ft/data\%03d.fid -ov -verb\n"; } } } sub getline { my ($file, $lineno) = @_; push @FILES, glob($file); open F, "<$file" or die "$0 cannot open $file for reading: $!"; my @lines = ; return $lines[$lineno - 1]; } sub getacqpar { my $self = shift; my $argv = $self->{argv}; my $par = $argv->{-default}; my @lines = util::readfile('./acqus'); my $result = bruker::findpar($par,\@lines); if (not defined $result) { app::message("value not found"); exit 0; } my $type = $result->{'type'}; my $value = $result->{'value'}; if ($type eq 'array') { my $max = scalar(@$value); print "\n" if ($max > 0); for (my $i=0;$i<$max;$i++) { my $ind = $i+1; print "\t$par$ind = $value->[$ind]\n"; } print "\n" if ($max > 0); } elsif ($type eq 'scalar') { $value =~ tr/<>/'/; print "\n\t$par = $value\n\n"; } else { die "internal error: unknown type $type returned by bruker::findpar()\n"; } } sub acqfile { my $dim = shift; return 'acqus' if $dim == 1; return 'acqu' . $dim . 's'; } sub fm #frequency match { my ($actual,$target) = @_; return 1 if (abs($target - $actual) < 0.01); return 0; } sub searchp { my %argv = %{shift()}; my $tol = $argv{'-tol'} || 0.01; my $res = $argv{'-res'} || die $searchp_msg; my @files = $argv{'-files'} || glob './*.save'; my @matches; print "\nspectrum peaks\n\n"; foreach my $file (@files) { my @lines = util::readfile($file); # for now only read one ... clause shift @lines while $lines[0] !~ //; shift @lines;# tag line pop @lines while $lines[$#lines] !~ //; pop @lines; # now only read ONE clause!!! die "unsupported format of sparky .save file" if grep(//,@lines); use File::Basename; $file = basename($file); $file =~ s/^([^\.]+)\..*$/\1/; my %peaks; while (@lines) { shift @lines while @lines && $lines[0] !~ /type peak/; last if !@lines; shift @lines; my @peaklines; push(@peaklines, shift @lines) while @lines && $lines[0] !~ /type peak/; while (@peaklines) { shift @peaklines while @peaklines && $peaklines[0] !~ /^id/; last if !@peaklines; $peaklines[0] =~ /^id\s+(\d+)$/; my $id = $1; shift @peaklines while @peaklines && $peaklines[0] !~ /^pos/; last if !@peaklines; my $line = $peaklines[0]; chomp $line; $line =~ s/^pos\s+(.*)$/\1/; $line =~ s/([^\s])\s+$/\1/; my @bits = split /\s+/, $line; if (is_near($res,\@bits,$tol))# todo look here i have internal::peak::near { printf "%-10s", $file; printf("%9.3f", $_) foreach (@bits); shift @peaklines while @peaklines && $peaklines[0] !~ /^rs/; if (@peaklines) { $peaklines[0] =~ /^rs\s+(.*)$/; my $ass = $1; $ass =~ s/^\s*\|(.*)\|\s*$/\1/; my @ass = split /\|\s+\|/, $ass; foreach my $ass (@ass) { my ($res,$atom) = split /\|/, $ass; printf "%3s%1s%-6s", $atom, '@', $res; #$ass =~ s/\|/\@/; #printf "%-15s", $ass; } } print "\n"; } } } } print "\n"; } sub is_near { my ($res, $other, $tol) = @_; foreach my $oneof (@$other) { return 1 if abs($res-$oneof) <= $tol; } return 0; } sub preg { my %argv = %{shift()}; my $file = $argv{'-shifts'} || die $shift_msg; push @FILES, glob($file); open F, "<$file" or die "cant open $file:$!"; my @lines = ; my %res; foreach my $line (@lines) { $line =~ s/^\s+(.*)/\1/; chomp $line; my @bits = split /\s+/, $line; next if $bits[0] !~ /^([a-z]+\d+)([-+]?\d+)?$/; $res{$1} = {} if not defined $res{$1}; $res{$1}{$bits[0]}++; } foreach my $key (sort keys %res) { printf "%-8s%30s\n", "$key:", join(',', sort(keys %{$res{$key}})); } } sub findres { my $self = shift; my %argv = %{$self->{argv}}; my $ppm = $argv{'-res'}; my $tol = $argv{'-tol'}; my $file = $argv{'-shifts'}; my $fmt = $argv{'-fmt'}; my $nom = $argv{'-nom'}; app::error("dont understand -res argument value $ppm") if $ppm !~ /^[0-9]+(\.[0-9]+)?$/; my $ass = nmr::readass($file,$fmt,$nom); my @res; foreach my $res (@$ass) { my $freq = $res->{'shift'}; push(@res, $res) if abs($freq - $ppm) <= $tol; } foreach my $res (@res) { printf "%s %s %7.3f\n", $res->{residue}, $res->{atom}, $res->{'shift'}; } } sub calibrate_shifts { my $self = shift; my %argv = %{$self->{argv}}; if (defined $argv{'-help'}) { my $prog = $self->{program}; $self->message("$prog -shifts . Calibrate ". "spectral reference point and spectral width using ". "linear regression. ". "shift table must be a text file with two columns ". "with the chemical shifts of uncalibrated spectrum ". "in the first column and those of the reference ". "spectrum in the second column. ". "lines beginning with # are ignored"); exit; } my $reg = Statistics::Regression->new(2, 'fitting of sw & ref', ['b','a']); my $file = $argv{-shifts} || app::error("-shifts parameter required"); push @FILES, glob($file); open F, "<$file" or die "can't open $file: $!"; my @lines = ; my @data; my @delta; foreach my $line (@lines) { chomp $line; $line =~ s/^\s+//; next if $line =~ /^#/; my @bits = split /\s+/, $line; $reg->include($bits[1],[$bits[0],1],1); #$reg->include($bits[1]-$bits[0],[1,1],1); push @data, \@bits; push @delta, ($bits[0]-$bits[1]); } my $delta; $delta += $_ foreach @delta; $delta /= scalar(@delta); if (not defined $argv{-sw}) { #todo maybe make this into separate function doing ttest #or maybe get it off the web print "\nPrinting out deviations point-by-point and goodness of fit.\n"; printf "\n%8s%8s%9s%9s\n\n", 'new data', 'target', 'before', 'after'; my $var_b=0;#variance before fit my $n=0; my $var_a=0;#variance after fit #foreach my $data (sort {$a->[0]<=>$b->[1]} @data) foreach my $data (@data) { my $x = $data->[0]; my $y = $data->[1]; my $expected = $x - $delta; printf "%8.3f%8.3f%9.4f%9.4f\n",$x,$y,$y-$x,$y-$expected; $var_b += ($x-$y)**2; $var_a += ($expected-$y)**2; $n++; } $var_b = sqrt($var_b/$n); $var_a = sqrt($var_a/$n); print "\nData variance.\n"; printf "before: %8.6f\n", $var_b; printf "after fitting: %8.6f\n\n", $var_a; print "Set REF at: ", $argv{'-ref'} - $delta, "\n"; print "\n"; } else { my ($slope, $const) = $reg->theta; my $rsq = $reg->rsq; my $sw = $argv{-sw} || die "-sw parameter required"; my $ref = $argv{'-ref'} || die "-ref parameter required"; #$reg->print; #if ($rsq < 0.9999) { #todo maybe make this into separate function doing ttest #or maybe get it off the web print "\nPrinting out deviations point-by-point and goodness of fit.\n"; printf "\n%8s%8s%9s%9s\n\n", 'new data', 'target', 'before', 'after'; my $var_b=0;#variance before fit my $n=0; my $var_a=0;#variance after fit #foreach my $data (sort {$a->[0]<=>$b->[1]} @data) foreach my $data (@data) { my $x = $data->[0]; my $y = $data->[1]; my $expected = $slope*$x + $const; printf "%8.3f%8.3f%9.4f%9.4f\n",$x,$y,$y-$x,$y-$expected; $var_b += ($x-$y)**2; $var_a += ($expected-$y)**2; $n++; } $var_b = sqrt($var_b/$n); $var_a = sqrt($var_a/$n); printf "\nVariance before fitting: %8.6f\n", $var_b; printf " Variance after fitting: %8.6f\n", $var_a; printf " R^2: %8.6f\n", $rsq; print "\nSet new conversion parameters at:\n"; printf "SW\t%-10.3f\n", $sw*$slope; printf "REF\t%-5.3f\n", $ref*$slope + $const; print "\n"; } } } sub importpeaks { my $self = shift; my $infiles = $self->{argv}{-peaks}; my $out = $self->{argv}{-dat}; my $format = $self->{argv}{-fmt}; # todo implement reading of the peak assignments #my $nom = $self->{argv}{-nom}; util::assert(sub{not ref $out},'parameter -dat not understood '. 'only one project file allowed'); util::assert(sub{not ref $infiles},'parameter -peaks not understood '. 'only one peaklist can be loaded at a time'); my $SP; if (-f $out) { $SP = internal::spectrum::read($out); } else { $SP = internal::spectrum::new(); } # todo this is a plug my @files = ($infiles); foreach my $file (@files) { my $peaks; if ($format eq 'sparky') { $peaks = sparky::readpeaks($file); } elsif ($format eq 'pipp') { $peaks = pipp::readpeaks($file); } if ($file =~ /\./) { my @bits = split /\./, $file; pop @bits; $file = join('.',@bits); } $file = util::filebasename($file); internal::spectrum::setpeaks($SP,$file,$peaks); } # save peaks into an xml dump file util::savedata($SP,$out,'force'); internal::spectrum::printinfo($SP); } # PACKAGES package email; sub send { my %arg = @_; my $addr = $arg{address}; my $subj = '"'.$arg{subject}.'"'; my $mess = $arg{body}; open F, '| mail -s ' . $subj ." $addr"; print F $mess; close F; } package Statistics::Regression; my $VERSION = '0.15'; use strict; ################################################################ use constant TINY => 1e-8; ################################################################ =head1 NAME Regression.pm - weighted linear regression package (line+plane fitting) =head1 SYNOPSIS use Statistics::Regression; # Create regression object my $reg = Statistics::Regression->new( 3, "sample regression", [ "const", "someX", "someY" ] ); # Add data points $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] ); $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] ); $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] ); $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] ); # Print the result $reg->print(); # Prints the following: # **************************************************************** # Regression 'sample regression' # **************************************************************** # Theta[0='const']= 0.2950 # Theta[1='someX']= 0.6723 # Theta[2='someY']= 1.0688 # R^2= 0.808, N= 4 # **************************************************************** # Or, to get the values of the coefficients and R^2 my @theta = $reg->theta; my $rsq = $reg->rsq; =head1 DESCRIPTION Regression.pm is a multivariate linear regression package. That is, it estimates the c coefficients for a line-fit of the type y= c(0)*x(0) + c(1)*x1 + c(2)*x2 + ... + c(k)*xk given a data set of N observations, each with k independent x variables and one y variable. Naturally, N must be greater than k---and preferably considerably greater. Any reasonable undergraduate statistics book will explain what a regression is. Most of the time, the user will provide a constant ('1') as x(0) for each observation in order to allow the regression package to fit an intercept. =head1 ALGORITHM =head2 Original Algorithm (ALGOL-60): W. M. Gentleman, University of Waterloo, "Basic Description For Large, Sparse Or Weighted Linear Least Squares Problems (Algorithm AS 75)," Applied Statistics (1974) Vol 23; No. 3 =head2 INTERNALS R=Rbar is an upperright triangular matrix, kept in normalized form with implicit 1's on the diagonal. D is a diagonal scaling matrix. These correspond to "standard Regression usage" as X' X = R' D R A backsubsitution routine (in thetacov) allows to invert the R matrix (the inverse is upper-right triangular, too!). Call this matrix H, that is H=R^(-1). (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1) = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]' =head2 Remarks This algorithm is the statistical "standard." Insertion of a new observation can be done one obs at any time (WITH A WEIGHT!), and still only takes a low quadratic time. The storage space requirement is of quadratic order (in the indep variables). A practically infinite number of observations can easily be processed! =head1 METHODS =cut ################################################################ #### let's start with handling of missing data ("nan" or "NaN") my $nan= "NaN"; sub isNaN { if ($_[0] !~ /[0-9nan]/) { die "definitely not a number in NaN: '$_[0]'"; } return ($_[0]=~ /NaN/i) || ($_[0] != $_[0]); } ################################################################ =head2 new my $reg = Statistics::Regression->new($n, $name, \@var_names) Receives the number of variables on each observations (i.e., an integer) and returns the blessed data structure as a Statistics::Regression object. Also takes an optional name for this regression to remember, as well as a reference to a k*1 array of names for the X coefficients. =cut ################################################################ sub new { my $classname= shift(@_); my $K= shift(@_); # the number of variables my $regname= shift(@_) || "with no name"; if (!defined($K)) { die "Regression->new needs at least one argument for the number of variables"; } if ($K<=1) { die "Cannot run a regression without at least two variables."; } sub zerovec { my @rv; for (my $i=0; $i<=$_[0]; ++$i) { $rv[$i]=0; } return \@rv; } bless { k => $K, regname => $regname, xnames => shift(@_), # constantly updated n => 0, sse => 0, syy => 0, sy => 0, wghtn => 0, d => zerovec($K), thetabar => zerovec($K), rbarsize => ($K+1)*$K/2+1, rbar => zerovec(($K+1)*$K/2+1), # other constants neverabort => 0, # computed on demand theta => undef, sigmasq => undef, rsq => undef, adjrsq => undef }, $classname; } ################################################################ =head2 dump $reg->dump Used for debugging. =cut ################################################################ sub dump { my $this= $_[0]; print "****************************************************************\n"; print "Regression '$this->{regname}'\n"; print "****************************************************************\n"; sub print1val { no strict; print "$_[1]($_[2])=\t". ((defined($_[0]->{ $_[2] }) ? $_[0]->{ $_[2] } : "intentionally undef")); my $ref=$_[0]->{ $_[2] }; if (ref($ref) eq 'ARRAY') { my $arrayref= $ref; print " $#$arrayref+1 elements:\n"; if ($#$arrayref>30) { print "\t"; for(my $i=0; $i<$#$arrayref+1; ++$i) { print "$i='$arrayref->[$i]';"; } print "\n"; } else { for(my $i=0; $i<$#$arrayref+1; ++$i) { print "\t$i=\t'$arrayref->[$i]'\n"; } } } elsif (ref($ref) eq 'HASH') { my $hashref= $ref; print " ".scalar(keys(%$hashref))." elements\n"; while (my ($key, $val) = each(%$hashref)) { print "\t'$key'=>'$val';\n"; } } else { print " [was scalar]\n"; } } while (my ($key, $val) = each(%$this)) { $this->print1val($key, $key); } print "****************************************************************\n"; } ################################################################ =head2 print $reg->print prints the estimated coefficients, and R^2 and N. For an example see the SYNOPSIS. =cut ################################################################ sub print { my $this= $_[0]; print "****************************************************************\n"; print "Regression '$this->{regname}'\n"; print "****************************************************************\n"; my $theta= $this->theta(); for (my $i=0; $i< $this->k(); ++$i) { print "Theta[$i".(defined($this->{xnames}->[$i]) ? "='$this->{xnames}->[$i]'":"")."]= ".sprintf("%12.4f", $theta->[$i])."\n"; } print "R^2= ".sprintf("%.3f", $this->rsq()).", N= ".$this->n()."\n"; print "****************************************************************\n"; } ################################################################ =head2 include $n = $reg->include( $y, [ $x1, $x2, $x3 ... $xk ], $weight ); Add one new observation. The weight is optional. Note that inclusion with a weight of -1 can be used to delete an observation. Returns the number of observations so far included. =cut ################################################################ sub include { my $this = shift(); my $yelement= shift(); my $xrow= shift(); my $weight= shift() || 1.0; # omit observations with missing observations; if (!defined($yelement)) { die "Internal Error: yelement is undef"; } if (isNaN($yelement)) { return $this->{n}; } my @xcopy; for (my $i=1; $i<=$this->{k}; ++$i) { if (!defined($xrow->[$i-1])) { die "Internal Error: xrow [ $i-1 ] is undef"; } if (isNaN($xrow->[$i-1])) { return $this->{n}; } $xcopy[$i]= $xrow->[$i-1]; } $this->{syy}+= ($weight*($yelement*$yelement)); $this->{sy}+= ($weight*($yelement)); if ($weight>=0.0) { ++$this->{n}; } else { --$this->{n}; } $this->{wghtn}+= $weight; for (my $i=1; $i<=$this->{k};++$i) { if ($weight==0.0) { return $this->{n}; } if (abs($xcopy[$i])>(TINY)) { my $xi=$xcopy[$i]; my $di=$this->{d}->[$i]; my $dprimei=$di+$weight*($xi*$xi); my $cbar= $di/$dprimei; my $sbar= $weight*$xi/$dprimei; $weight*=($cbar); $this->{d}->[$i]=$dprimei; my $nextr=int( (($i-1)*( (2.0*$this->{k}-$i))/2.0+1) ); if (!($nextr<=$this->{rbarsize}) ) { die "Internal Error 2"; } my $xk; for (my $kc=$i+1;$kc<=$this->{k};++$kc) { $xk=$xcopy[$kc]; $xcopy[$kc]=$xk-$xi*$this->{rbar}->[$nextr]; $this->{rbar}->[$nextr]= $cbar * $this->{rbar}->[$nextr]+$sbar*$xk; ++$nextr; } $xk=$yelement; $yelement-= $xi*$this->{thetabar}->[$i]; $this->{thetabar}->[$i]= $cbar*$this->{thetabar}->[$i]+$sbar*$xk; } } $this->{sse}+=$weight*($yelement*$yelement); # indicate that Theta is garbage now $this->{theta}= undef; $this->{sigmasq}= undef; $this->{rsq}= undef; $this->{adjrsq}= undef; return $this->{n}; } ################################################################ =head2 theta $theta = $reg->theta @theta = $reg->theta Estimates and returns the vector of coefficients. In scalar context returns an array reference; in list context it returns the list of coefficients. =cut ################################################################ sub theta { my $this= shift(); if (defined($this->{theta})) { return wantarray ? @{$this->{theta}} : $this->{theta}; } if ($this->{n} < $this->{k}) { return; } for (my $i=($this->{k}); $i>=1; --$i) { $this->{theta}->[$i]= $this->{thetabar}->[$i]; my $nextr= int (($i-1)*((2.0*$this->{k}-$i))/2.0+1); if (!($nextr<=$this->{rbarsize})) { die "Internal Error 3"; } for (my $kc=$i+1;$kc<=$this->{k};++$kc) { $this->{theta}->[$i]-=($this->{rbar}->[$nextr]*$this->{theta}->[$kc]); ++$nextr; } } my $ref = $this->{theta}; shift(@$ref); # we are counting from 0 # if in a scalar context, otherwise please return the array directly wantarray ? @{$this->{theta}} : $this->{theta}; } ################################################################ =head2 rsq, adjrsq, sigmasq, ybar, sst, k, n $rsq = $reg->rsq; # etc... These methods provide common auxiliary information. rsq, adjrsq, sigmasq, sst, and ybar have not been checked but are likely correct. The results are stored for later usage, although this is somewhat unnecessary because the computation is so simple anyway. =cut ################################################################ sub rsq { my $this= shift(); return $this->{rsq}= 1.0- $this->{sse} / $this->sst(); } sub adjrsq { my $this= shift(); return $this->{adjrsq}= 1.0- (1.0- $this->rsq())*($this->{n}-1)/($this->{n} - $this->{k}); } sub sigmasq { my $this= shift(); return $this->{sigmasq}= ($this->{n}<=$this->{k}) ? "Inf" : ($this->{sse}/($this->{n} - $this->{k})); } sub ybar { my $this= shift(); return $this->{ybar}= $this->{sy}/$this->{wghtn}; } sub sst { my $this= shift(); return $this->{sst}= ($this->{syy} - $this->{wghtn}*($this->ybar())**2); } sub k { my $this= shift(); return $this->{k}; } sub n { my $this= shift(); return $this->{n}; } ################################################################ =head1 BUGS/PROBLEMS =over 4 =item Missing This package lacks routines to compute the standard errors of the coefficients. This requires access to a matrix inversion package, and I do not have one at my disposal. If you want to add one, please let me know. =item Perl Problem perl is unaware of IEEE number representations. This makes it a pain to test whether an observation contains any missing variables (coded as 'NaN' in Regression.pm). =back =for comment pod2html -noindex -title "perl weighted least squares regression package" Regression.pm > Regression.html =head1 VERSION 0.15 =head1 AUTHOR Naturally, Gentleman invented this algorithm. Adaptation by ivo welch. Alan Miller (alan@dmsmelb.mel.dms.CSIRO.AU) pointed out nicer ways to compute the R^2. Ivan Tubert-Brohman helped wrap the module as as standard CPAN distribution. =head1 LICENSE This module is released for free public use under a GPL license. (C) Ivo Welch, 2001,2004. =cut 1; #2345345325 vim: sw=2 sts=2 package toolkit; sub initExp { my %EXP = ( hncacogp3d=>{title=>'HNCACO'}, ji_HXNOE_rlx=>{title=>'HETERONUCLEAR 2D-NOESY', par=>[{name=>'mixing time', expr=>undef}]}, 'ji_noeChsqc3d.rc'=>{title=>'3D 13C-edited NOESY-HSQC', par=>[{name=>'mixing time', expr=>'d8'}]}, hcacogp3d=>{title=>'HCACO'}, ji_noeChsqc3d=>{title=>'3D 13C-edited NOESY-HSQC', par=>[{name=>'mixing time', expr=>'d8'}]}, ccconhgp3d=>{title=>'CCCONH', par=>[{name=>'mixing time', expr=>'d15'}]}, 'ji_hcchcosy3d.rc'=>{title=>'Constant-Time HCCH COSY'}, hccconhgp3d2=>{title=>'HCCCONH', par=>[{name=>'mixing time', expr=>'d15'} ]}, ji_hcchtocsy3d=>{ title=>'3D HC(C)H-TOCSY', par=>[{name=>'mixing time', expr=>'217.3 * p9 *l15'}], }, 'hncagp3d.2'=>{title=>'3D HNCA'}, 'hncagpwg3d'=>{title=>'3D HNCA-WATERGATE'}, 'hncogp3d.2'=>{title=>'3D HNCO'}, hncacbgpwg3d=>{title=>'3D HNCACB-WATERGATE'}, hsqcetgp=>{title=>'2D HSQC'}, 'cbcaconhgp3d.2'=>{title=>'3D CBCA(CO)NH'}, hncacbgpwg3d=>{title=>'3D HNCACB-WATERGATE'}, ji_hnhbDecC3d=>{title=>'3D HNHB'}, 'roesyhsqcfpf3gpsi3d.rcB'=>{title=>'3D ROESY-HSQC', par=>{'name'=>'mixing time',expr=>"d26*2"}}, 'tocsyhsqc3d.rc'=>{title=>'3D TOCSY-HSQC', par=>[{name=>'mixing time', expr=>'p6 * 115.112 * l11'}] }, 'noe11ezg.jw'=>{title=>'2D 11-ECHO NOESY', par=>[{name=>'mixing time', expr=>'d8'}]}, 'ji_hnha3d'=>{title=>'3D HNHA'}, 'HSQCFPSI.rc'=>{title=>'2D HSQC with water flip-back'}, 'noesyhsqcfpf3gpsi3d.rc'=>{title=>"3D NOESY-HSQC, sensitivity enhanced,\n\t\t\t\t\twith water flipback", par=>{name=>'mixing time', expr=>"d8"}}, 'zgpr'=>{title=>'Presat-1D'}, '11echozg.jw'=>{title=>'1D 11-Echo with gradient H2O suppression'}, 'SD_noesypr'=>{title=>'Bruker 2D 11-Echo NOESY', par=>[{name=>'mixing time', expr=>'d9'}]}, 'hsqcwfb_C.rc'=>{title=>'HSQC with water flip-back'}, 'noesy_1D_CRYO.rc'=>{title=>'2D WATERGATE NOESY with 13C/15N decoupling'}, 'zg'=>{title=>'Basic 1D'}, 'invietf3gpsi'=>{title=>'2D HSQC'}, 'hsqcwfb.ui'=>{title=>'2D HSQC with water flip-back'}, 'hsqcfb_C.rc'=>{title=>'2D HSQC with water flip-back'}, 'hsqcetfpf3gpsi'=>{title=>'2D HSQC with sensitivity enhancement'}, 'ji_hsqcwfbDecC'=>{title=>'2D HSQC, water flip-back, 13C decoupling'}, 'invietgpsi'=>{title=>'2D HSQC with sensitivity enhancement'}, 'p3919gp'=>{title=>'1D 3-9-19 watergate H2O suppression'}, 'hsqcetf3gp'=>{title=>'2D HSQC'}, 'ji_WstF12fnoeIII.rc'=>{title=>'2D NOESY, F1&F2 13C-filtered'}, 'hsqcwfb.kj'=>{title=>'2D HSQC with water flip-back'}, 'invietf3gpsi.RC'=>{title=>'2D HSQC, with 13C decoupling, sensitivity enchancement'}, 'hsqcetf3gpsi'=>{title=>'2D HSQC, with sensitivity enchancement'}, 'SD_11echo1Ddc'=>{title=>'1D with 1-1 echo water suppression'}, 'ji_noesy3919'=>{title=>'2D NOESY with 3-9-19 watergate water suppression'}, 'hsqct2etf3gpsi'=>{title=>'2D HSQC for 15N T2 measurements'}, 'hsqcetgpsi'=>{title=>'2D HSQC'}, 'noesyhsqcetgp3d'=>{title=>'3D NOESY-HSQC'}, 'noesyhsqcfpf3gpsi3d'=>{title=>'3D NOESY-HSQC with sensitivity enchancement'}, 'ji_WstF12fnoe.ckl'=>{title=>'2D NOESY, F1&F2 13C-filtered'}, 'hsqcfpf3gpphwg'=>{title=>'2D HSQC, watergate'}, 'ji_WstF12fnoe_0405.ckl'=>{title=>'2D NOESY, F1&F2 13C-filtered'}, 'ji_hcchcosy3d'=>{title=>'3D constant time HCCH-COSY'}, 'cbcaconhgpwg3d'=>{title=>'3D CBCACONH, watergate'}, 'SD_cityprst'=>{title=>'2D ROESY with presat'}, 'hsqcwfb_C_DMSO.rc'=>{title=>'2D HSQC, water flip-back, for CRYO'}, 'hncogpwg3d'=>{title=>'3D HNCO, watergate'}, 'roesygpst19.2'=>{title=>'2D ROESY, 3-9-19 watergate'}, 'ji_hsqcCshape'=>{title=>'3D NOESY-HSQC'}, 'hsqcnoef3gpsi'=>{title=>'2D HETERONUCLEAR NOE'}, 'noesy_1D_CR_DMSO.rc'=>{title=>'2D NOESY, 13C/15N dec, watergate'}, 'ji_F1fNoeChsqc3d.rc'=>{title=>'3D F1-filtered F2-13C edited NOESY'}, '15nT2.ui'=>{title=>'2D HSQC for 15N T2 measurement (Junji)'}, 'roesyprst.2'=>{title=>'2D ROESY, presat'}, 'SD_1Dpresat'=>{title=>'1D with presat'}, 'hncacb3d.ui'=>{title=>'3D HNCACB (Junji)'}, 'hcchdigp3d.rc2'=>{title=>'3D HCCH-TOCSY (modified by Rob)'}, 'ji_tocsy3919'=>{title=>'2D TOCSY, 3-9-19 watergate'}, 'mlevhsqcetf3gp3d'=>{title=>'3D TOCSY-HSQC, sensitivity enchancement'}, 'tocsyhsqc3d.ma'=>{title=>'3D TOCSY-HSQC (Junji)'}, 'hbhaconhgpwg3d'=>{title=>'3D HBHACONH, watergate'}, 'noesygptp19'=>{title=>'2D NOESY, 3-9-19 watergate'}, '11echozg.my'=>{title=>'1D, 1-1 echo'}, 'tochsqc3d.ui'=>{title=>'3D TOCSY-HSQC (Junji)'}, 'ji_WstF12fnoe'=>{title=>'2D F2,F1-filtered NOESY, WURST (Junji)'}, 'ji_WstF2fnoe_0405.cklb'=>{title=>'2D F2 filtered NOESY (Junji,Chu)'} ); return \%EXP; } package bio; package bio::aminoacid; sub bio::aminoacid::atom::isamid { my $atoms = shift; return 0 if (scalar(@$atoms)==0); foreach my $atom (@$atoms) { return 0 if $atom->{atom} ne 'HN';#todo fix this!!! } return 1; } sub bio::aminoacid::atom::ismethyl { my $atoms = shift; if (scalar(@$atoms)==1) { my $atom = $atoms->[0]; my %nom = bio::protein::aminoacid::atom::nomenclature(); my ($residue,$junk) = bio::protein::sequence::idres($atom->{residue}); my $name = $atom->{atom}; my $all = $nom{$residue}; foreach my $try (@$all) { if ($try->{iupac} eq $name) { return 1 if $try->{ambig_regex} =~ /\[123\]$/; } } return 0; } else{ foreach my $atom (@$atoms) { return 0 if not bio::aminoacid::atom::ismethyl([$atom]); } return 1; } } sub bio::aminoacid::globatoms { #function's purpose is to allow globbing (obtaining a list of items based on #some symolic matching expression) atom names from nomenclature by aminoacid #type or from alternative atom table altnom_table #altnom_table is on residue by residue basis #not by residue type, probably that table is calculated #from structure models #if altnom_table is defined then list of available atoms for globbing #is not taken from bio::protein::aminoacid::atom::nomenclature but from #this custom table. the goal of introducing altnom_table is to be able #to look up atoms in molecule with nonstandard atom names my ($glob,$nom,$altnom_table) = @_; my $residue = $glob->{residue}; my $atom = $glob->{atom}; if (not defined $nom) { die "nomenclature must be defined for bio::aminoacid::globatoms"; } my $regex = xplor::wk2regex($atom); #atom list compilation step my @res_atoms; if (not defined $altnom_table) { my $aatype = $residue; $aatype =~ s/\d//g; my %nom = bio::protein::aminoacid::atom::nomenclature(); my $all_atoms = $nom{$aatype}; @res_atoms = map {$_->{$nom}} @$all_atoms; } else { @res_atoms = @{$altnom_table->{$residue}}; } my @group = grep /^$regex$/, @res_atoms; $residue =~ /^[A-Z]+(\d+)$/; my $resno = $1; for (my $i=0;$i$resno,residue=>$residue,atom=>$group[$i]}; } return \@group; } sub bio::aminoacid::atoms::getconnected { my $atom = shift; my $aa = $atom->{residue}; my $aa_atoms = bio::aminoacid::atoms($aa); my @conn; $aa =~ /^[A-Z]+(\d+)$/; my $resno = $1; foreach my $aa_atom (@$aa_atoms) { my $at = {atom=>$aa_atom,residue=>$aa,resno=>$resno}; if (bio::aminoacid::atoms::connected($atom,$at)) { push @conn, $at; } } if ($atom->{atom} =~ /^H/ and scalar(@conn) > 1) { die "found more then one atom connected to proton, ". "something wrong must be with the atom table"; } # CAVEAT only return one connected atom, # good enough for protons return $conn[0]; } sub bio::aminoacid::atoms::connected # return 1 if two atoms are connected # and 0 if they are not # one of atoms must be proton # also look at caveats commented here { # at1 and at2 are like resonance assignments my ($at1,$at2) = @_; my $atom1 = $at1->{atom}; my $atom2 = $at2->{atom}; my $residue1 = $at1->{residue}; my $residue2 = $at2->{residue}; # todo here is a CAVEAT # i ignore sequential connectivity here return 0 if ($residue1 ne $residue2); # todo here important actually use connectivity # and spectrum based connectivity too if (length($atom1) == 1 and length($atom2) == 1) { # here we check amide H&N my $poss = join('',sort qw(H N)); my $is = join('',sort($atom1,$atom2)); next if $is ne $poss; } elsif (length($atom1) >= 2 and length($atom2) >=2) { # expect atom1 to be proton if ($atom1 !~ /^H/ and $atom2 =~ /^H/) { my $tmp = $atom1; $atom1 = $atom2; $atom2 = $tmp; } # another caveat here: # only look at protons connected to other atoms # cannot both be nuclei of the same kind elsif ($atom1 =~ /^H/ and $atom2 =~ /^H/) { return 0; } elsif ($atom1 !~ /^H/ and $atom2 !~ /^H/) { return 0; } my @a1 = split /|/, $atom1; my @a2 = split /|/, $atom2; return 0 if ($a1[1] ne $a2[1]); if (scalar(@a2) > 2) { # proton's name is shorter then that of heteroatom return 0 if scalar(@a1) <= 2; # additional name modifiers are different # in proton and heteroatom return 0 if $a1[2] != $a2[2]; } } else { # one atom has one letter name and the other one # has longer name # these cannot be connected return 0; } return 1; } sub bio::aminoacid::atoms { my $aa = bio::protein::sequence::aatype(shift()); my %nom = bio::protein::aminoacid::atom::nomenclature(); $aa = bio::protein::aminoacid::format(-style=>'one_letter', -res=>$aa); my $list = $nom{$aa}; my @atoms; foreach my $item (@$list) { push @atoms, $item->{iupac}; } return \@atoms; } sub bio::aminoacid::atomnames { my %nom = bio::protein::aminoacid::atom::nomenclature(); my %AAA; foreach my $aatype (keys %nom) { my $atoms = $nom{$aatype}; foreach my $atom (@$atoms) { my %new = %$atom; my $name = delete $new{'iupac'}; $AAA{$aatype}{$name} = \%new; } } return \%AAA; } sub bio::aminoacid::badatom { my ($atom,$aa) = @_; my %nom = bio::protein::aminoacid::atom::nomenclature(); my $atoms = $nom{$aa}; my @iupac; foreach my $a (@$atoms) { push @iupac, $a->{iupac}; } if ($atom =~ /^[^#]+[#]$/) { my $sub = '\d+'; $atom =~ s/#/$sub/; #util::dump($atom); } elsif( $atom =~ /^[^%]+[%]$/) { my $sub = '\d'; $atom =~ s/%/$sub/; } my $regex = "^$atom\$"; return 0 if util::array::somematch(\@iupac,$regex); return 1; } sub bio::aminoacid::getambigatoms # the purpose of this subroutine is determine list of diastereotopic # atoms related to the current one { my ($atom) = @_; my $residue = $atom->{residue}; my $atomname = $atom->{atom}; my ($aacode, $num) = bio::protein::sequence::idres($residue); if (bio::aminoacid::badatom($atomname,$aacode)) { print "ooops $atomname @ $residue\n"; } my %nom = bio::protein::aminoacid::atom::nomenclature(); my $atoms = $nom{$aacode}; my %ambig_regex; foreach my $a (@$atoms) { $ambig_regex{$a->{iupac}} = $a->{ambig_regex}; } my $regex = $ambig_regex{$atomname}; if ($atomname =~ /^(.*)([23])$/ and not defined $regex) { $regex = "$1\[23\]"; #app::message("ok? $regex ; $atomname @ $aacode"); } elsif (not defined $regex) { #app::message("ok? $atomname @ $aacode"); # here we beleive that atom is not prochiral and such return [$atom]; } elsif ($atomname !~ /[23]$/ and defined $regex) { #app::message("strange: ambig regex is $regex atom ". # "$atomname residue $aacode"); } else { #app::message("ok: ambig regex is $regex atom ". # "$atomname residue $aacode"); } # expand regex with topipp here todo importand replace this hack with # better mechanism my @atomnames = sparky::atom::parse($regex); my @out; foreach my $name (@atomnames) { my %new = %$atom; $new{atom}=$name; push @out, \%new; } return \@out; } package bio::protein::aminoacid::atom; sub bio::protein::aminoacid::atom::scramble { my ($atom,$residue) = @_; # some code to deal with input assignments like # HB#|HA1|HB2 but where atoms still belong to # the same residue if ($atom =~ /\|/) { my @iparts = split /\|/, $atom; my @oparts; foreach my $part (@iparts) { $part = bio::protein::aminoacid::atom::scramble( $part,$residue ); push @oparts, split(/\|/,$part); } # retain only unique atom labels my %atoms; for (my $i=0; $i < scalar(@oparts); $i++) { $atoms{$oparts[$i]}=$i; } my @atoms = keys %atoms; # mark unusable labels that match any regex-like labels # such as HB# and HD1% # and remove redundant regex like labels # HB% and HB# (HB% must be removed) my $R = '[#%]'; ATOMi: for (my $i=0; $i < scalar(@atoms); $i++) { my $ai = $atoms[$i]; next if $atoms{$ai} == -1; next if $ai !~ /$R$/; my $regexi = xplor::wk2regex($ai); ATOMj: for (my $j=0; $j < scalar(@atoms); $i++) { my $aj = $atoms[$j]; next if $i == $j; next if $atoms{$aj} == -1; if ($aj !~ /$R$/) { $atoms{$aj} = -1 if $aj =~ /^$regexi$/; } else { my $regexj = xplor::wk2regex($aj); if ($regexj =~ /^$regexi$/) { $atoms{$aj} = -1; next ATOMj; } elsif ($regexi =~ /^$regexj$/) { $atoms{$ai} = -1; next ATOMi; } } } } # collect reduced list of unique labels # while maintaining their order to distinguish pairs like # HB2|HB1 and HB1|HB2 my %out = reverse %atoms; delete $out{-1}; # remove the last redundant element my @list = sort keys %out; # save the order # output atoms in the same order they came in return join('|',map {$out{$_}} @list); } return $atom if $atom =~ /[#%]$/; return sparky::atom::topipp($atom) if $atom =~ /[\[\]]/; # last case: unfold prochiral and otherwise equivalent # atoms as ambiguous my %nom = bio::protein::aminoacid::atom::nomenclature(); my $atoms = $nom{$residue}; my $alt = undef; foreach my $a (@$atoms) { if ($atom eq $a->{iupac}) { $alt = $a->{ambig_regex}; last; } } return $atom if not defined $alt; my @alt = sparky::atom::parse($alt); my @obits; foreach my $alt (@alt) { push @obits, $alt if $alt ne $atom; } return join('|',$atom, @obits); } sub bio::protein::aminoacid::getatomtable { my $nom = shift; my %nomenclature = bio::protein::aminoacid::atom::nomenclature(); my %table; foreach my $aa (keys %nomenclature) { my $atoms = $nomenclature{$aa}; $table{$aa} = []; foreach my $atom (@$atoms) { my $name = $atom->{$nom}; push @{$table{$aa}}, $name; } } return \%table; } sub references{ return ( 'sykes94csi'=> '@article{wishart1994ccs, title={{The 13 C Chemical-Shift Index: A simple method for the identification of protein secondary structure using 13 C chemical-shift data}}, author={Wishart, D.S. and Sykes, B.D.}, journal={Journal of Biomolecular NMR}, volume={4}, number={2}, pages={171--180}, year={1994}, publisher={Springer}, url={http://dx.doi.org/10.1007/BF00175245} }' ); } sub bio::protein::aminoacid::atom::shifts{ return ( 'A'=>{ 'CA'=>{'rcoil'=>52.5,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>177.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>19.0,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'C'=>{ 'CA'=>{'rcoil'=>58.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>174.8,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>28.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'Cox'=>{ 'comment'=>'oxidized form of cysteine', 'CA'=>{'rcoil'=>58.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>174.8,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>28.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'D'=>{ 'CA'=>{'rcoil'=>54.1,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>177.2,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>40.8,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'E'=>{ 'CA'=>{'rcoil'=>56.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>29.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'F'=>{ 'CA'=>{'rcoil'=>57.9,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.8,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>39.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'G'=>{ 'CA'=>{'rcoil'=>45.0,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>173.6,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>,'rcoilerr'=>,'source'=>'sykes94csi'}, }, 'H'=>{ 'CA'=>{'rcoil'=>55.8,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>32.0,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'I'=>{ 'CA'=>{'rcoil'=>62.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.9,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>37.5,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'K'=>{ 'CA'=>{'rcoil'=>56.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.5,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>32.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'L'=>{ 'CA'=>{'rcoil'=>55.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>177.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>41.9,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'M'=>{ 'CA'=>{'rcoil'=>56.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.8,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>32.8,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'N'=>{ 'CA'=>{'rcoil'=>53.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>39.0,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'P'=>{ 'CA'=>{'rcoil'=>62.9,'rcoilerr'=>4.0,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.0,'rcoilerr'=>4.0,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>31.7,'rcoilerr'=>4.0,'source'=>'sykes94csi'}, }, 'Q'=>{ 'CA'=>{'rcoil'=>56.2,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.3,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>30.1,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'R'=>{ 'CA'=>{'rcoil'=>56.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>176.5,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>30.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'S'=>{ 'CA'=>{'rcoil'=>58.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>173.7,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>62.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'T'=>{ 'CA'=>{'rcoil'=>63.1,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.2,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>68.1,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'V'=>{ 'CA'=>{'rcoil'=>63.0,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>177.1,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>31.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'W'=>{ 'CA'=>{'rcoil'=>57.8,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.8,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>28.3,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, 'Y'=>{ 'CA'=>{'rcoil'=>58.6,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, 'C'=>{'rcoil'=>175.7,'rcoilerr'=>0.5,'source'=>'sykes94csi'}, 'CB'=>{'rcoil'=>38.7,'rcoilerr'=>0.7,'source'=>'sykes94csi'}, }, ); } sub bio::protein::aminoacid::atom::nomenclature # data from table $bank/atom_nom.tbl that was lifted online { return ( 'F' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD', 'pdb' => 'HD1', 'xplor' => 'HD1', 'ambig_regex' => 'HD[12]', 'iupac' => 'HD1' }, { 'cara' => 'HD', 'pdb' => 'HD2', 'xplor' => 'HD2', 'ambig_regex' => 'HD[12]', 'iupac' => 'HD2' }, { 'cara' => 'HE', 'pdb' => 'HE1', 'xplor' => 'HE1', 'ambig_regex' => 'HE[12]', 'iupac' => 'HE1' }, { 'cara' => 'HE', 'pdb' => 'HE2', 'xplor' => 'HE2', 'ambig_regex' => 'HE[12]', 'iupac' => 'HE2' }, { 'cara' => 'HZ', 'pdb' => 'HZ', 'xplor' => 'HZ', 'iupac' => 'HZ' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD1', 'pdb' => 'CD1', 'xplor' => 'CD1', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD1' }, { 'cara' => 'CD2', 'pdb' => 'CD2', 'xplor' => 'CD2', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD2' }, { 'cara' => 'CE1', 'pdb' => 'CE1', 'xplor' => 'CE1', 'ambig_regex' => 'CE[12]', 'iupac' => 'CE1' }, { 'cara' => 'CE2', 'pdb' => 'CE2', 'xplor' => 'CE2', 'ambig_regex' => 'CE[12]', 'iupac' => 'CE2' }, { 'cara' => 'CZ', 'pdb' => 'CZ', 'xplor' => 'CZ', 'iupac' => 'CZ' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'S' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG', 'pdb' => 'HG', 'xplor' => 'HG', 'iupac' => 'HG' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OG', 'pdb' => 'OG', 'xplor' => 'OG', 'iupac' => 'OG' } ], 'T' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB', 'pdb' => 'HB', 'xplor' => 'HB', 'iupac' => 'HB' }, { 'cara' => 'HG1', 'pdb' => 'HG1', 'xplor' => 'HG1', 'iupac' => 'HG1' }, { 'cara' => 'HG2', 'pdb' => '1HG2', 'xplor' => 'HG21', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG21' }, { 'cara' => 'HG2', 'pdb' => '2HG2', 'xplor' => 'HG22', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG22' }, { 'cara' => 'HG2', 'pdb' => '3HG2', 'xplor' => 'HG23', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG23' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG2', 'pdb' => 'CG2', 'xplor' => 'CG2', 'iupac' => 'CG2' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OG1', 'pdb' => 'OG1', 'xplor' => 'OG1', 'iupac' => 'OG1' } ], 'N' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD21', 'pdb' => '2HD2', 'xplor' => 'HD21', 'ambig_regex' => 'HD2[12]', 'iupac' => 'HD21' }, { 'cara' => 'HD22', 'pdb' => '1HD2', 'xplor' => 'HD22', 'ambig_regex' => 'HD2[12]', 'iupac' => 'HD22' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'ND2', 'pdb' => 'ND2', 'xplor' => 'ND2', 'iupac' => 'ND2' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OD1', 'pdb' => 'OD1', 'xplor' => 'OD1', 'iupac' => 'OD1' } ], 'K' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HD2', 'pdb' => '1HD', 'xplor' => 'HD2', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD2' }, { 'cara' => 'HD3', 'pdb' => '2HD', 'xplor' => 'HD1', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD3' }, { 'cara' => 'HE2', 'pdb' => '1HE', 'xplor' => 'HE2', 'ambig_regex' => 'HE[23]', 'iupac' => 'HE2' }, { 'cara' => 'HE3', 'pdb' => '2HE', 'xplor' => 'HE1', 'ambig_regex' => 'HE[23]', 'iupac' => 'HE3' }, { 'cara' => 'HZ1', 'pdb' => '1HZ', 'xplor' => 'HZ1', 'ambig_regex' => 'HZ[123]', 'iupac' => 'HZ1' }, { 'cara' => 'HZ2', 'pdb' => '2HZ', 'xplor' => 'HZ2', 'ambig_regex' => 'HZ[123]', 'iupac' => 'HZ2' }, { 'cara' => 'HZ3', 'pdb' => '3HZ', 'xplor' => 'HZ3', 'ambig_regex' => 'HZ[123]', 'iupac' => 'HZ3' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD', 'pdb' => 'CD', 'xplor' => 'CD', 'iupac' => 'CD' }, { 'cara' => 'CE', 'pdb' => 'CE', 'xplor' => 'CE', 'iupac' => 'CE' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'NZ', 'pdb' => 'NZ', 'xplor' => 'NZ', 'iupac' => 'NZ' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'Y' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD', 'pdb' => 'HD1', 'xplor' => 'HD1', 'ambig_regex' => 'HD[12]', 'iupac' => 'HD1' }, { 'cara' => 'HD', 'pdb' => 'HD2', 'xplor' => 'HD2', 'ambig_regex' => 'HD[12]', 'iupac' => 'HD2' }, { 'cara' => 'HE', 'pdb' => 'HE1', 'xplor' => 'HE1', 'ambig_regex' => 'HE[12]', 'iupac' => 'HE1' }, { 'cara' => 'HE', 'pdb' => 'HE2', 'xplor' => 'HE2', 'ambig_regex' => 'HE[12]', 'iupac' => 'HE2' }, { 'cara' => 'HH', 'pdb' => 'HH', 'xplor' => 'HH', 'iupac' => 'HH' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD1', 'pdb' => 'CD1', 'xplor' => 'CD1', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD1' }, { 'cara' => 'CD2', 'pdb' => 'CD2', 'xplor' => 'CD2', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD2' }, { 'cara' => 'CE1', 'pdb' => 'CE1', 'xplor' => 'CE1', 'ambig_regex' => 'CE[12]', 'iupac' => 'CE1' }, { 'cara' => 'CE2', 'pdb' => 'CE2', 'xplor' => 'CE2', 'ambig_regex' => 'CE[12]', 'iupac' => 'CE2' }, { 'cara' => 'CZ', 'pdb' => 'CZ', 'xplor' => 'CZ', 'iupac' => 'CZ' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OH', 'pdb' => 'OH', 'xplor' => 'OH', 'iupac' => 'OH' } ], 'E' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HE2', 'pdb' => 'HE2', 'xplor' => 'HE2', 'iupac' => 'HE2' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD', 'pdb' => 'CD', 'xplor' => 'CD', 'iupac' => 'CD' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OE1', 'pdb' => 'OE1', 'xplor' => 'OE1', 'iupac' => 'OE1' }, { 'cara' => 'OE2', 'pdb' => 'OE2', 'xplor' => 'OE2', 'iupac' => 'OE2' } ], 'V' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB', 'pdb' => 'HB', 'xplor' => 'HB', 'iupac' => 'HB' }, { 'cara' => 'HG1', 'pdb' => '1HG1', 'xplor' => 'HG11', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG11' }, { 'cara' => 'HG1', 'pdb' => '2HG1', 'xplor' => 'HG12', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG12' }, { 'cara' => 'HG1', 'pdb' => '3HG1', 'xplor' => 'HG13', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG13' }, { 'cara' => 'HG2', 'pdb' => '1HG2', 'xplor' => 'HG21', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG21' }, { 'cara' => 'HG2', 'pdb' => '2HG2', 'xplor' => 'HG22', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG22' }, { 'cara' => 'HG2', 'pdb' => '3HG2', 'xplor' => 'HG23', 'ambig_regex' => 'HG[12][123]', 'iupac' => 'HG23' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG1', 'pdb' => 'CG1', 'xplor' => 'CG1', 'ambig_regex' => 'CG[12]', 'iupac' => 'CG1' }, { 'cara' => 'CG2', 'pdb' => 'CG2', 'xplor' => 'CG2', 'ambig_regex' => 'CG[12]', 'iupac' => 'CG2' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'Q' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HE21', 'pdb' => '2HE2', 'xplor' => 'HE21', 'ambig_regex' => 'HE2[12]', 'iupac' => 'HE21' }, { 'cara' => 'HE22', 'pdb' => '1HE2', 'xplor' => 'HE22', 'ambig_regex' => 'HE2[12]', 'iupac' => 'HE22' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD', 'pdb' => 'CD', 'xplor' => 'CD', 'iupac' => 'CD' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'NE2', 'pdb' => 'NE2', 'xplor' => 'NE2', 'iupac' => 'NE2' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OE1', 'pdb' => 'OE1', 'xplor' => 'OE1', 'iupac' => 'OE1' } ], 'M' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HE', 'pdb' => '1HE', 'xplor' => 'HE1', 'ambig_regex' => 'HE[123]', 'iupac' => 'HE1' }, { 'cara' => 'HE', 'pdb' => '2HE', 'xplor' => 'HE2', 'ambig_regex' => 'HE[123]', 'iupac' => 'HE2' }, { 'cara' => 'HE', 'pdb' => '3HE', 'xplor' => 'HE3', 'ambig_regex' => 'HE[123]', 'iupac' => 'HE3' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CE', 'pdb' => 'CE', 'xplor' => 'CE', 'iupac' => 'CE' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'SD', 'pdb' => 'SD', 'xplor' => 'SD', 'iupac' => 'SD' } ], 'C' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG', 'pdb' => 'HG', 'xplor' => 'HG', 'iupac' => 'HG' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'SG', 'pdb' => 'SG', 'xplor' => 'SG', 'iupac' => 'SG' } ], 'L' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG', 'pdb' => 'HG', 'xplor' => 'HG', 'iupac' => 'HG' }, { 'cara' => 'HD1', 'pdb' => '1HD1', 'xplor' => 'HD11', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD11' }, { 'cara' => 'HD1', 'pdb' => '2HD1', 'xplor' => 'HD12', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD12' }, { 'cara' => 'HD1', 'pdb' => '3HD1', 'xplor' => 'HD13', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD13' }, { 'cara' => 'HD2', 'pdb' => '1HD2', 'xplor' => 'HD21', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD21' }, { 'cara' => 'HD2', 'pdb' => '2HD2', 'xplor' => 'HD22', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD22' }, { 'cara' => 'HD2', 'pdb' => '3HD2', 'xplor' => 'HD23', 'ambig_regex' => 'HD[12][123]', 'iupac' => 'HD23' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD1', 'pdb' => 'CD1', 'xplor' => 'CD1', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD1' }, { 'cara' => 'CD2', 'pdb' => 'CD2', 'xplor' => 'CD2', 'ambig_regex' => 'CD[12]', 'iupac' => 'CD2' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'A' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB', 'pdb' => '1HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[123]', 'iupac' => 'HB1' }, { 'cara' => 'HB', 'pdb' => '2HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[123]', 'iupac' => 'HB2' }, { 'cara' => 'HB', 'pdb' => '3HB', 'xplor' => 'HB3', 'ambig_regex' => 'HB[123]', 'iupac' => 'HB3' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'W' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD1', 'pdb' => 'HD1', 'xplor' => 'HD1', 'iupac' => 'HD1' }, { 'cara' => 'HE1', 'pdb' => 'HE1', 'xplor' => 'HE1', 'iupac' => 'HE1' }, { 'cara' => 'HE3', 'pdb' => 'HE3', 'xplor' => 'HE3', 'iupac' => 'HE3' }, { 'cara' => 'HZ2', 'pdb' => 'HZ2', 'xplor' => 'HZ2', 'iupac' => 'HZ2' }, { 'cara' => 'HZ3', 'pdb' => 'HZ3', 'xplor' => 'HZ3', 'iupac' => 'HZ3' }, { 'cara' => 'HH2', 'pdb' => 'HH2', 'xplor' => 'HH2', 'iupac' => 'HH2' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD1', 'pdb' => 'CD1', 'xplor' => 'CD1', 'iupac' => 'CD1' }, { 'cara' => 'CD2', 'pdb' => 'CD2', 'xplor' => 'CD2', 'iupac' => 'CD2' }, { 'cara' => 'CE2', 'pdb' => 'CE2', 'xplor' => 'CE2', 'iupac' => 'CE2' }, { 'cara' => 'CE3', 'pdb' => 'CE3', 'xplor' => 'CE3', 'iupac' => 'CE3' }, { 'cara' => 'CZ2', 'pdb' => 'CZ2', 'xplor' => 'CZ2', 'iupac' => 'CZ2' }, { 'cara' => 'CZ3', 'pdb' => 'CZ3', 'xplor' => 'CZ3', 'iupac' => 'CZ3' }, { 'cara' => 'CH2', 'pdb' => 'CH2', 'xplor' => 'CH2', 'iupac' => 'CH2' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'NE1', 'pdb' => 'NE1', 'xplor' => 'NE1', 'iupac' => 'NE1' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'X' => [ { 'cara' => 'H1', 'pdb' => '1H', 'xplor' => 'HT1', 'iupac' => 'H1' }, { 'cara' => 'H2', 'pdb' => '2H', 'xplor' => 'HT2', 'iupac' => 'H2' }, { 'cara' => 'H3', 'pdb' => '3H', 'xplor' => 'HT3', 'iupac' => 'H3' }, { 'cara' => 'H\'\'', 'pdb' => '', 'xplor' => '', 'iupac' => 'H\'\'' }, { 'cara' => 'O1', 'pdb' => 'O', 'xplor' => 'OT1', 'iupac' => 'O1' }, { 'cara' => 'O2', 'pdb' => 'OXT', 'xplor' => 'OT2', 'iupac' => 'O2' } ], 'P' => [ { 'cara' => 'H2', 'pdb' => 'H2', 'xplor' => 'HT2', 'ambig_regex' => 'H[23]', 'iupac' => 'H2' }, { 'cara' => 'H3', 'pdb' => 'H1', 'xplor' => 'HT1', 'ambig_regex' => 'H[23]', 'iupac' => 'H3' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HD2', 'pdb' => '1HD', 'xplor' => 'HD2', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD2' }, { 'cara' => 'HD3', 'pdb' => '2HD', 'xplor' => 'HD1', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD3' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD', 'pdb' => 'CD', 'xplor' => 'CD', 'iupac' => 'CD' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'H' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD1', 'pdb' => 'HD1', 'xplor' => 'HD1', 'iupac' => 'HD1' }, { 'cara' => 'HD2', 'pdb' => 'HD2', 'xplor' => 'HD2', 'iupac' => 'HD2' }, { 'cara' => 'HE1', 'pdb' => 'HE1', 'xplor' => 'HE1', 'iupac' => 'HE1' }, { 'cara' => 'HE2', 'pdb' => 'HE2', 'xplor' => 'HE2', 'iupac' => 'HE2' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD2', 'pdb' => 'CD2', 'xplor' => 'CD2', 'iupac' => 'CD2' }, { 'cara' => 'CE1', 'pdb' => 'CE1', 'xplor' => 'CE1', 'iupac' => 'CE1' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'ND1', 'pdb' => 'ND1', 'xplor' => 'ND1', 'iupac' => 'ND1' }, { 'cara' => 'NE2', 'pdb' => 'NE2', 'xplor' => 'NE2', 'iupac' => 'NE2' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'D' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HD2', 'pdb' => '', 'xplor' => '', 'iupac' => 'HD2' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' }, { 'cara' => 'OD1', 'pdb' => 'OD1', 'xplor' => 'OD1', 'iupac' => 'OD1' }, { 'cara' => 'OD2', 'pdb' => 'OD2', 'xplor' => 'OD2', 'iupac' => 'OD2' } ], 'I' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB', 'pdb' => 'HB', 'xplor' => 'HB', 'iupac' => 'HB' }, { 'cara' => 'HG12', 'pdb' => '1HG1', 'xplor' => 'HG12', 'ambig_regex' => 'HG1[23]', 'iupac' => 'HG12' }, { 'cara' => 'HG13', 'pdb' => '2HG1', 'xplor' => 'HG11', 'ambig_regex' => 'HG1[23]', 'iupac' => 'HG13' }, { 'cara' => 'HG2', 'pdb' => '1HG2', 'xplor' => 'HG21', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG21' }, { 'cara' => 'HG2', 'pdb' => '2HG2', 'xplor' => 'HG22', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG22' }, { 'cara' => 'HG2', 'pdb' => '3HG2', 'xplor' => 'HG23', 'ambig_regex' => 'HG2[123]', 'iupac' => 'HG23' }, { 'cara' => 'HD1', 'pdb' => '1HD1', 'xplor' => 'HD11', 'ambig_regex' => 'HD1[123]', 'iupac' => 'HD11' }, { 'cara' => 'HD1', 'pdb' => '2HD1', 'xplor' => 'HD12', 'ambig_regex' => 'HD1[123]', 'iupac' => 'HD12' }, { 'cara' => 'HD1', 'pdb' => '3HD1', 'xplor' => 'HD13', 'ambig_regex' => 'HD1[123]', 'iupac' => 'HD13' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG1', 'pdb' => 'CG1', 'xplor' => 'CG1', 'iupac' => 'CG1' }, { 'cara' => 'CG2', 'pdb' => 'CG2', 'xplor' => 'CG2', 'iupac' => 'CG2' }, { 'cara' => 'CD1', 'pdb' => 'CD1', 'xplor' => 'CD1', 'iupac' => 'CD1' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'R' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA', 'pdb' => 'HA', 'xplor' => 'HA', 'iupac' => 'HA' }, { 'cara' => 'HB2', 'pdb' => '1HB', 'xplor' => 'HB2', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB2' }, { 'cara' => 'HB3', 'pdb' => '2HB', 'xplor' => 'HB1', 'ambig_regex' => 'HB[23]', 'iupac' => 'HB3' }, { 'cara' => 'HG2', 'pdb' => '1HG', 'xplor' => 'HG2', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG2' }, { 'cara' => 'HG3', 'pdb' => '2HG', 'xplor' => 'HG1', 'ambig_regex' => 'HG[23]', 'iupac' => 'HG3' }, { 'cara' => 'HD2', 'pdb' => '1HD', 'xplor' => 'HD2', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD2' }, { 'cara' => 'HD3', 'pdb' => '2HD', 'xplor' => 'HD1', 'ambig_regex' => 'HD[23]', 'iupac' => 'HD3' }, { 'cara' => 'HE', 'pdb' => 'HE', 'xplor' => 'HE', 'iupac' => 'HE' }, { 'cara' => 'HH11', 'pdb' => '1HH1', 'xplor' => 'HH11', 'ambig_regex' => 'HH[12][12]', 'iupac' => 'HH11' }, { 'cara' => 'HH12', 'pdb' => '2HH1', 'xplor' => 'HH12', 'ambig_regex' => 'HH[12][12]', 'iupac' => 'HH12' }, { 'cara' => 'HH21', 'pdb' => '1HH2', 'xplor' => 'HH21', 'ambig_regex' => 'HH[12][12]', 'iupac' => 'HH21' }, { 'cara' => 'HH22', 'pdb' => '2HH2', 'xplor' => 'HH22', 'ambig_regex' => 'HH[12][12]', 'iupac' => 'HH22' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'CB', 'pdb' => 'CB', 'xplor' => 'CB', 'iupac' => 'CB' }, { 'cara' => 'CG', 'pdb' => 'CG', 'xplor' => 'CG', 'iupac' => 'CG' }, { 'cara' => 'CD', 'pdb' => 'CD', 'xplor' => 'CD', 'iupac' => 'CD' }, { 'cara' => 'CZ', 'pdb' => 'CZ', 'xplor' => 'CZ', 'iupac' => 'CZ' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'NE', 'pdb' => 'NE', 'xplor' => 'NE', 'iupac' => 'NE' }, { 'cara' => 'NH1', 'pdb' => 'NH1', 'xplor' => 'NH1', 'ambig_regex' => 'NH[12]', 'iupac' => 'NH1' }, { 'cara' => 'NH2', 'pdb' => 'NH2', 'xplor' => 'NH2', 'ambig_regex' => 'NH[12]', 'iupac' => 'NH2' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ], 'G' => [ { 'cara' => 'H', 'pdb' => 'H', 'xplor' => 'HN', 'iupac' => 'H' }, { 'cara' => 'HA2', 'pdb' => '1HA', 'xplor' => 'HA2', 'ambig_regex' => 'HA[23]', 'iupac' => 'HA2' }, { 'cara' => 'HA3', 'pdb' => '2HA', 'xplor' => 'HA1', 'ambig_regex' => 'HA[23]', 'iupac' => 'HA3' }, { 'cara' => 'C', 'pdb' => 'C', 'xplor' => 'C', 'iupac' => 'C' }, { 'cara' => 'CA', 'pdb' => 'CA', 'xplor' => 'CA', 'iupac' => 'CA' }, { 'cara' => 'N', 'pdb' => 'N', 'xplor' => 'N', 'iupac' => 'N' }, { 'cara' => 'O', 'pdb' => 'O', 'xplor' => 'O', 'iupac' => 'O' } ] ); } sub bio::protein::aminoacid::atom::translate # translate aminoacid atom names between different formats { my %arg = @_; my $from = $arg{-from} or die "internal: need 'from' type"; my $to = $arg{-to} or die "internal: need 'to' type"; my $atom = $arg{-atom} or die "internal: need atom name"; my $aa = $arg{-aminoacid} or die "internal: need aminoacid"; my $names = $arg{-table}; my $num; # todo this is suboptimal hash is read many many times if ($atom =~ /\|/) { my @atoms = split /\|/, $atom; my @oatoms; foreach my $atom (@atoms) { my %newarg = %arg; $newarg{-atom} = $atom; push @oatoms, bio::protein::aminoacid::atom::translate( %newarg); } return join('|',@oatoms); } if (not defined $names) { $names = {bio::protein::aminoacid::atom::nomenclature()}; } if ($atom =~ /[#%]/) { if ($atom =~ /\d/) { #app::warning("atom $atom in residue $aa may not have translated correctly"); } return $atom; } if ($aa !~ /^([A-Z])([\d]+)?$/) { if ($aa =~ /^\d+$/) { app::error("there is some error in the program that needs to be fixed...\n". "are you reading cara file? try converting one without pseudo spin systems"); } app::error("one letter uppercase format expected for aminoacids"); } else { $aa = $1; $num = $2; } if ($aa eq 'X') { app::error("X is not allowed for aminoacid names"); } if (not defined $names->{$aa}) { app::error("aminoacid $aa unknown"); } my $AA = $names->{$aa}; my @formats = keys %{$AA->[0]}; if (not util::array::isin($to,\@formats)) { app::error("unknown atom naming scheme $to"); } if (not util::array::isin($from,\@formats)) { app::error("unknown atom naming scheme $from"); } # rehash atom names foreach my $a (@$AA) { my $new_name = $a->{$to}; if ($a->{$from} eq $atom) { if ($from eq 'cara') { my $diff = length($new_name) - length($atom); if ($diff == 0) { return $new_name; } elsif ($diff == 1) { $new_name =~ s/.$//; return $new_name.'#'; } else { die "possibly error in the code lead to this, talk to the programmer"; } } else { return $new_name; } } } $aa = bio::protein::aminoacid::format(-res=>$aa,-style=>'three_letter'); app::error("atom $atom is not allowed for residue $aa".$num. ' '. "in $from style"); return undef; } package bio::protein::aminoacid; sub bio::protein::aminoacid::format # assumes aminoacid either in one letter or two letter style # all uppercase letters { my %arg = @_; my $res = $arg{-res} || die "internal: residue expected"; my $style = $arg{-style} || die "internal: style expected"; my $num; if ($res !~ /^([^\d]+)(\d+)?/) { app::error("unexpected residue $res"); } my $num = $2; my $aa = $1; if ($style eq 'three_letter') { return $aa if $aa =~ /^[A-Z]{3}$/;# todo use util::array::isin here if ($aa =~ /^([A-Z])(\d+)?$/) { $aa = $1; my $pos = $2; my %aa = reverse %AA; if (scalar keys %aa != scalar keys %AA) { die "internal: aminoacid hash corrupt"; } $aa = $aa{$aa} or app::error("unexpected residue '$res'"); #push @out, $aa . $pos; } else { app::error("unexpected residue label '$res'"); } } elsif ($style eq 'one_letter') { return $aa if $aa =~ /^[A-Z]$/; if ($aa !~ /^[A-Z]{3}$/) { app::error("unexpected residue label '$res'"); } $aa = $AA{$aa} or app::error("unexpected residue '$res'"); } elsif ($style eq 'full') { die "internal: not implemented"; } else { die "internal: unknown style '$style'"; } return $aa.$num; } package bio::structure; sub bio::structure::getprotons #res is list of residues (array of residue numbers) #return list of all protons with their internal data { my ($str,$res) = @_; #here deal with particular residues only my @out; my $atoms = bio::structure::getatoms($str,$res); foreach my $a (@$atoms) { push @out, $a if $a->{atom} =~ /^H/; } return \@out; } sub bio::structure::getatoms #$res is array of residue numbers #if it is not defined then atoms from all residues will be returned { my ($str,$res) = @_; my $m = $str->[0]; my @aa = keys %$m; my @out; foreach my $aa (@aa) { $aa =~ /(\d+)$/; my $resno = $1; next if defined $res and not util::array::isin($resno,$res); my $atoms = $m->{$aa}; my @names = keys %{$atoms}; foreach my $n (@names) { $atoms->{$n}{atom} = $n; $atoms->{$n}{residue} = $aa; push @out, $atoms->{$n}; } } return \@out; } sub bio::structure::calcbonds #calculate bonds within a molecule based on the first model { my $str = shift; my $cutoff = 1.5;#covalent bond cutoff my $model = $str->[0]; my @atoms; foreach my $aa (keys %$model) { foreach my $at (keys %{$model->{$aa}}) { push @atoms, $model->{$aa}{$at}; } } #my @sorted = sort {$a->{coor}[0]<=>$b->{coor}[0]} @atoms; my $n; foreach my $a (@atoms) { foreach my $b (@atoms) { if (util::array::dist($a->{coor},$b->{coor})<=$cutoff) { push @{$a->{bonds}},$b; $n++; } } } } sub bio::structure::addnmrass #add nmr chemical shifts from the assignment table to the first structural model (with index 0) { my ($str, $ass) = @_; my $atom_table = bio::structure::getatomtable($str); my %atoms; foreach my $a (@$ass) { my $natoms = bio::aminoacid::globatoms($a,'none',$atom_table); foreach my $na (@$natoms) { my $key = $na->{atom} . '@' . $na->{residue}; if (not defined $atoms{$key}) { $atoms{$key} = $na; } push @{$atoms{$key}{shifts}}, $a->{'shift'}; } } foreach my $key (keys %atoms) { my $atom = $atoms{$key}; my $residue = $atom->{residue}; my $atomname = $atom->{atom}; my $shifts = $atom->{'shifts'}; $str->[0]{$residue}{$atomname}{'shifts'} = $shifts; } } sub bio::structure::getatomlist # returns array of arrays (that are either single atom or three atoms of the methyl group) { my $str = shift; my $mdl = $str->[0]; my @aa = keys %$mdl; #todo maybe this code can go to a separate function rehashing #aminoacid atom nomenclature my %nom = bio::protein::aminoacid::atom::nomenclature(); foreach my $A (keys %nom) { my @atoms = @{$nom{$A}}; my %atoms; foreach my $atom (@atoms) { my $iupac = $atom->{iupac}; $atoms{$iupac} = $atom; } $nom{$A} = \%atoms; } my %groups; foreach my $a (@aa) #$a is aminoacid from the pdb model { my ($res,$num) = bio::protein::sequence::idres($a); my $ratoms = $nom{$res};#atoms from nomenclature my @atoms = keys %{$mdl->{$a}};#atoms from model foreach my $atom (@atoms) { next if ($atom !~ /^H/); if (bio::aminoacid::atom::ismethyl([bio::atom::new($a,$atom)])) { my $groupname = $nom{$res}{$atom}{ambig_regex}; $groupname =~ tr/[]//; my $groupname = $a . '@' . $groupname; my $group = $groups{$groupname}; if (not defined $group) { my @methylgroup; push @methylgroup, bio::atom::new($a,$atom); $groups{$groupname} = \@methylgroup; } else { my $group = $groups{$groupname}; push @$group, bio::atom::new($a,$atom); } } else { # use fake one-atom group my @fakegroup; push @fakegroup, bio::atom::new($a,$atom); $groups{$a . '@' . $atom} = \@fakegroup; } } } return [values %groups]; } sub bio::structure::findcontacts #all atoms must be in iupac format { my ($str,$group1,$cutoff,$nstr,$ave) = @_; return undef if (not defined $group1); # get list of atoms/methyl groups my $atoms = bio::structure::getatomlist($str); my @contacts; foreach my $group2 (@$atoms) { my @dist; for(my $j=0;$j[$j]{atom}; my $residue2 = $group2->[$j]{residue}; for (my $i=0;$i[$i]{atom}; my $residue1 = $group1->[$i]{residue}; my $dist = bio::structure::getmindist( $str, {'atom'=>$atom1,'residue'=>$residue1}, {'atom'=>$atom2,'residue'=>$residue2}); if (defined $dist) { push @dist, $dist; } else { app::warning('distance between atoms '. internal::atom::text($group1->[$i]). ' and '. internal::atom::text($group2->[$j]). 'not found in pdb file(s)'); } } } my $DIST; if ($ave eq 'SUM') { foreach my $d (@dist) { $DIST += $d**(-6); } $DIST = $DIST**(-1/6); } elsif ($ave eq 'AVE') { $DIST = util::array::mean(\@dist); } if ($DIST <= $cutoff) { push @contacts, {atoms=>$group2,dist=>$DIST}; } } return \@contacts; } sub bio::structure::getMinDistByLabel { my ($str,$atom1,$atom2,$ave) = @_; my $group1 = xplor::globatoms($atom1); my $group2 = xplor::globatoms($atom2); return undef if (not (defined $group1 and defined $group2)); util::assert(sub{scalar(@$group1)>0},"no atoms found for $atom1"); util::assert(sub{scalar(@$group2)>0},"no atoms found for $atom2"); my @dist; for (my $i=0;$i[$i]{atom}; my $residue1 = $group1->[$i]{residue}; my $atom1 = bio::protein::aminoacid::atom::translate( -from=>'xplor', -to=>'iupac', -atom=>$atom1, -aminoacid=>$residue1 ); for(my $j=0;$j[$j]{atom}; my $residue2 = $group2->[$j]{residue}; my $atom2 = bio::protein::aminoacid::atom::translate( -from=>'xplor', -to=>'iupac', -atom=>$atom2, -aminoacid=>$residue2 ); my $dist = bio::structure::getmindist( $str, {'atom'=>$atom1,'residue'=>$residue1}, {'atom'=>$atom2,'residue'=>$residue2}); if (defined $dist) { push @dist, $dist; } else { app::warning('distance between atoms '. internal::atom::text($group1->[$i]). ' and '. internal::atom::text($group2->[$j]). 'not found in pdb file(s)'); } } } my $DIST; if ($ave eq 'SUM') { foreach my $d (@dist) { $DIST += $d**(-6); } $DIST = $DIST**(-1/6); } elsif ($ave eq 'AVE') { $DIST = util::array::mean(\@dist); } return $DIST; } sub bio::structure::getmindist { my ($str, $ass1, $ass2, $ave) = @_; my $min = 999999999; if (ref $ass1 eq 'ARRAY' or ref $ass2 eq 'ARRAY') { if (not defined $ave) { die "averaging method must be defined for ...getmindist"; } if ($ave ne 'SUM') { die "only 'SUM' method allowed for distance averaging for now"; } $ass1 = util::array::insure($ass1); $ass2 = util::array::insure($ass2); my @dist; foreach my $as1 (@$ass1) { foreach my $as2 (@$ass2) { my $dist = bio::structure::getmindist($str,$as1,$as2); return undef if not defined $dist; push @dist, $dist; } } return undef if scalar @dist == 0; my $DIST; foreach my $dist (@dist) { $DIST += $dist**(-6); } return $DIST**(-1/6); } my $aa1 = $ass1->{residue}; my $atom1 = $ass1->{atom}; my $aa2 = $ass2->{residue}; my $atom2 = $ass2->{atom}; foreach my $mod (@$str) # todo here there is an assumption that # all models contain same atoms { my $coor1 = $mod->{$aa1}{$atom1}{coor}; my $coor2 = $mod->{$aa2}{$atom2}{coor}; return undef if not defined $coor1 or not defined $coor2; my ($x1,$y1,$z1) = @$coor1; my ($x2,$y2,$z2) = @$coor2; my ($dx,$dy,$dz) = ($x1-$x2,$y1-$y2,$z1-$z2); my $dist = sqrt($dx*$dx+$dy*$dy+$dz*$dz); $min = $dist if $dist < $min; } return $min; } sub bio::structure::writepdb { my ($models,$file,$fmt) = @_; $fmt = 'pdb' if not defined $fmt; # todo here this is kind of bad: i actually have # translation method # bio::protein::aminoacid::atom::translate my %nom = bio::protein::aminoacid::atom::nomenclature(); my %iupac2fmt; foreach my $aa (keys %nom) { foreach my $atom (@{$nom{$aa}}) { my $fmt = $atom->{$fmt}; my $iupac = $atom->{iupac}; $iupac2fmt{$aa}{$iupac} = $fmt; } } my $tr = \%iupac2fmt; my @lines; my $modnum = 1; foreach my $model (@$models) { if (scalar(@$models) > 1) { push @lines, sprintf "MODEL%9d\n", $modnum++; } push @lines, pdb_model_lines($model,$tr); if (scalar(@$models) > 1) { push @lines, "ENDMDL\n"; } } push @lines, "END\n"; util::writefile(\@lines,$file); app::message("file $file written"); return; sub pdb_model_lines { my ($model,$tr) = @_; my @residues = sort {util::cmpaa($a,$b)} keys %$model; my @pdblines; my $atomnum = 1; foreach my $residue (@residues) { my @atoms = sort keys %{$model->{$residue}}; foreach my $atom (@atoms) { print "$residue\t$atom\n"; my ($x,$y,$z) = @{$model->{$residue}{$atom}{coor}}; my $bfac = $model->{$residue}{$atom}{bfac}; my ($aa,$resnum) = bio::protein::sequence::idres($residue); # dont reorder the following two lines $atom = $tr->{$aa}{$atom}; my $aa = bio::protein::aminoacid::format( -res=>$aa,-style=>'three_letter'); my $line = 'ATOM'; $line .= sprintf "%7d", $atomnum++; my $atomfmt; if (length($atom) == 4 or $atom =~ /^\d/) { $atomfmt = " %-4s" } elsif (length($atom) <= 3) { $atomfmt = " %-3s"; } else { die "strange atom name $atom"; } $line .= sprintf $atomfmt, $atom; $line .= sprintf "%4s", $aa; $line .= sprintf "%6d", $resnum; $line .= ' '; $line .= sprintf "%8.3f",$x; $line .= sprintf "%8.3f",$y; $line .= sprintf "%8.3f",$z; $line .= sprintf " 1.00%6.2f\n",$bfac; push @pdblines, $line; } } return @pdblines; } } sub bio::structure::pdb::zerobfactors { my $str = shift; foreach my $mod (@$str) { foreach my $res (keys %$mod) { foreach my $at (keys %{$mod->{$res}}) { $mod->{$res}{$at}{bfac} = undef; } } } } sub bio::structure::pdb::setbfactor { my ($str,$residue,$atom,$bfactor) = @_; foreach my $mod (@$str) { $mod->{$residue}{$atom}{bfac} = $bfactor; } } sub bio::structure::readpdb { my ($file,$nom) = @_; $nom = 'pdb' if not defined $nom; util::assert(sub {util::array::isin($nom, ['xplor','pdb','iupac','none'])}, "unexpected format '$nom'; only pdb, ". "xplor and iupac formats are recognized"); if (ref $file eq 'ARRAY') { my @models; foreach my $f (@$file) { my $new = bio::structure::readpdb($f,$nom); push @models, @$new; } return \@models; } #app::message("reading pdb file $file ..."); my @lines = grep !/^REMARK/, util::readfile($file); my $type = 'single'; if (grep /^MODEL/, @lines) { $type = 'multiple'; } my @models; my $i=0; while (@lines) { my $line; my $stop = 'ATOM'; if ($type eq 'multiple') { $stop = 'MODEL'; } $line = shift(@lines) until ( $line =~ /^$stop/ or scalar(@lines) == 0 ); last if scalar(@lines) == 0; my @mdl; if ($type eq 'single') { # that line was first atom, save it push @mdl, $line; } do { $line = shift @lines; push @mdl, $line; } until (endline($lines[0],$type) or scalar (@lines) == 0); push @models, \@mdl; last if scalar(@lines) == 0; } sub endline { my ($line,$type) = @_; if ($type eq 'single') { return 1 if $line !~ /^ATOM/; } elsif ($type eq 'multiple') { return 1 if $line =~ /^ENDMDL/; } else { die "internal: unknown type '$type'"; } return 0; } my @MODELS; my %nom = bio::protein::aminoacid::atom::nomenclature(); foreach my $mdl (@models) { my %mdl; while ($mdl->[0] =~ /^ATOM/) { my $line = shift @$mdl; my $atom = util::line::peel(substr $line, 12, 4); my $aa = util::line::peel(substr $line, 17, 3); my $num = util::line::peel(substr $line, 22, 4); my $x = util::line::peel(substr $line, 30, 8); my $y = util::line::peel(substr $line, 38, 8); my $z = util::line::peel(substr $line, 46, 8); my $bfac = util::line::peel(substr $line, 60, 6); #skip terminal atoms next if $atom =~ /^HT/; next if $atom =~ /^OT/; my $newatom = $atom; if ($nom ne 'none') { $aa = bio::protein::aminoacid::format( -res=>$aa, -style=>'one_letter'); $newatom = bio::protein::aminoacid::atom::translate( -atom=>$atom, -aminoacid=>$aa, -from=>$nom, -to=>'iupac', -table=>\%nom); if (not defined $newatom) { app::error("file $file does not agree with ". "$nom style"); } } $mdl{$aa.$num}{$newatom}{coor} = [$x,$y,$z]; $mdl{$aa.$num}{$newatom}{bfac} = $bfac; } push @MODELS, \%mdl; } if (!@MODELS) { app::error("pdb file does not contain MODEL entries ". "other formats not supported yet"); } #app::message(util::line::reportnum(scalar(@MODELS),'model'). ' read'); return \@MODELS; } sub bio::structure::getatomtable { my $models = shift; my @res = keys %{$models->[0]}; my %atom_table; foreach my $r (@res) { $atom_table{$r} = [keys %{$models->[0]{$r}}]; } return \%atom_table; } sub bio::structure::getresnumbers { my $models = shift; my @res = keys %{$models->[0]}; my @out; foreach my $r (@res) { $r =~ s/[a-zA-Z]//g; push @out, $r; } return [sort {$a<=>$b} @out]; } package bio::protein::sequence; sub bio::protein::sequence::idres #id residue { my $aa = shift; if ($aa=~/^([A-Z]{1,3})(\d+)$/) { return ($1, $2); } app::error("unexpected residue $aa"); } sub bio::protein::sequence::number { my ($junk,$num) = bio::protein::sequence::idres(shift()); return $num; } sub bio::protein::sequence::aatype { my ($aatype,$junk) = bio::protein::sequence::idres(shift()); return $aatype; } sub bio::protein::sequence::badresidue { # todo important maybe do something more advanced here my $aa = shift; return 1 if ($aa !~ /^([A-Z]{1,3})(\d+)$/); return 0; } sub bio::protein::sequence::format { my %arg = @_; my $seq = $arg{-seq} || die "internal: must proide ref to sequence"; my $style = $arg{-style} || die "internal: must provide style"; if (ref $seq ne 'ARRAY') { die "internal: must be reference to seq array"; } my @in = @$seq; my $first = undef; if ($in[0] =~ /^\d+$/) { $first = shift @in; } my @out; foreach my $aa (@in) { # todo fix error checking here # actually do I want it??? # only accept valid aminoacids if ($aa =~ /^([^\d]+)(\d+)?$/) { my $res = $1; my $num = $2; $arg{-res} = $res; $res = bio::protein::aminoacid::format(%arg); push @out, $res . $num; } else { app::error("unexpected residue '$aa'"); } } if (defined $first) { unshift @out, $first; } return @out; } sub bio::protein::sequence::sort { my @seq = @_; return sort {compare($a,$b)} @seq; sub compare { my ($r1,$r2) = @_; $r1 =~ /^[A-Z]+(\d+)$/;my $num1 = $1; $r2 =~ /^[A-Z]+(\d+)$/;my $num2 = $1; return $num1<=>$num2; } } sub bio::protein::sequence::read # return plain array or residues residue first element is number of residues # residue numbering starts at index 1 (not 0 like in arrays) # will read peptide sequence from single file { my $file = shift; if (ref $file eq 'ARRAY') { util::error("only one peptide sequence file allowed"); } elsif (ref $file) { die "internal: wrong parameter type in bio::protein::sequence::read"; } my @seq = util::readfile($file); foreach (@seq) { chomp; s/\s//g; } my $seq = join('',@seq); my @aa = split(/\B/,$seq); unshift @aa, scalar(@aa); return @aa; } package app; sub app::message { my $msg = shift; use File::Basename; my $prog = basename $0; print util::line::wrap(-line=>$msg); print "\n"; } sub app::output { my $msg = shift; use File::Basename; my $prog = basename $0; print util::line::wrap(-line=>$msg); print "\n"; } sub app::errmessage { my $msg = shift; use File::Basename; my $prog = basename $0; print STDERR "\n"; print STDERR util::line::wrap(-line=>$msg); print STDERR "\n"; } sub app::die { app::error("code error: " . shift); } sub app::error { my $msg = shift; if (ref $msg eq 'ARRAY') { if (scalar @$msg > 1) { app::errmessage("errors found:"); foreach my $m (@$msg) { app::errmessage(" $m"); } exit 1; } else { $msg = $msg->[0]; } } app::errmessage("error: $msg"); print STDERR "\n"; use Cwd; use File::Temp qw(tempfile tempdir); use File::Copy qw(copy); if ($ERROR_REPORTING) { my $temp_dir = tempdir(DIR=>glob($ERROR_DIR)); open F, ">$temp_dir/command.log"; print F "\nCommand: "; print F $COMMAND . "\n\n"; print F "User " . getlogin() . "\n\n"; print F "Time " . localtime() . "\n\n"; print F "Working directory " . getcwd() . "\n\n"; close F; foreach my $file (@FILES) { copy($file,$temp_dir); } #my ($tar_fh,$tar_fname) = tempfile(); #$tar_fname =~ /([^\/]+)$/; #$tar_fname = $1; #close($tar_fh); #system("tar czf $tar_fname $temp_dir"); #system("mv $tar_fname $ERROR_DIR/$tar_fname.tar"); #email::send(address=>$PROGRAMMER_EMAIL,subject=>'bug in nut',body=>$msg."file $tar_fname"); #unlink($fname); chmod 0755, $temp_dir; chmod 0644, glob("$temp_dir/*"); chown((getpwnam($programmer))[2,3],glob("$temp_dir/*")); chown((getpwnam($programmer))[2,3],$temp_dir); } exit 1; } sub app::warning { my $msg = shift; if (ref $msg eq 'ARRAY') { foreach my $m (@$msg) { app::errmessage("warning: $m\n"); } } app::errmessage("warning: $msg\n"); } package util::assert; sub util::assert::nosuchfile { my $files = shift; $files = util::array::insure($files); my @err; foreach my $file (@$files) { if (-d $file) { push @err,"$file is a directory"; } if (-f $file) { push @err, "file $file already exists, cannot overwrite"; } } if (scalar(@err) > 0) { app::error(\@err); } } package util::scalar; sub util::scalar::set_lower{ #set lower value to $subj my ($subj,$cand) = @_; if ($cand < $$subj){ $$subj = $cand; } } sub util::scalar::greek_ord { my $let = shift; $let =~ tr/[a-z]/[A-Z]/; my %greek = ('N'=>0,'A'=>1,'B'=>2,'G'=>3,'D'=>4,'E'=>5,'Z'=>6,'H'=>8); my @let = keys %greek; if (not util::array::isin($let,\@let)) { app::error("unexpected character $let in atom name"); } return $greek{$let}; } sub util::scalar::sign { my $in = shift; return undef if not util::scalar::isnum($in); if ($in >= 0) { return '+'; } else { return '-'; } } sub util::scalar::setif { my ($ref,$val) = @_; $$ref = $val if (defined $val and $val ne /^\s*$/); return; } sub util::scalar::inrange { my ($val,$range) = @_; my @range = sort {$a<=>$b} @$range; return 1 if ($val >= $range[0] and $val <= $range[1]); return 0; } sub util::scalar::isnum { my $what = shift; return 1 if $what =~ /^$NUMPATTERN$/; return 0; } package util; sub util::filebasename { my $file = shift; use File::Basename; $file = basename($file); $file =~ s/^([^\.]+)\..*$/\1/; return $file; } sub util::cmpaa { my ($a1,$a2) = @_; $a1 =~ /^[A-Z](\d+)/; my $n1 = $1; $a2 =~ /^[A-Z](\d+)/; my $n2 = $1; return $n1<=>$n2; } sub util::assert # first parameter must be code reference # last parameter - error message to print if assertion fails # remaining parameters are passed to code contained in the # first parameter # example #util::assert( # $util::array::{allin}, # [map {$ax->{$_}{label}} keys %$ax], # \@required, # "axes ".util::word::join(@required).'are required '. # 'for spectrum nnoe' #); { my ($code,@par) = @_; my $msg = pop @par; if ($code->(@par)==0) { app::error($msg); } } sub util::readdata { my $file = shift; my $xml = XML::Dumper->new(); my $data = $xml->xml2pl($file); return $data; } sub util::savedata { my ($data, $out, $par) = @_; my $dump = new XML::Dumper; my $xml = $dump->pl2xml($data); util::writefile($xml,$out, $par); } sub util::print { use Data::Dumper; print Dumper(\@_), "\n"; } sub util::dump { util::print(@_); exit; } sub util::readfile { my $name = shift; $name = util::array::insure($name); my @lines; foreach my $file (@$name) { push @FILES, glob($file); open F, "<$file" or app::error("cannot open $file for reading. $!"); push @lines ,; } return @lines; } sub util::writefile { my ($what,$file,$par) = @_; if ($file eq 'STDOUT') { foreach my $line (@$what) { die "internal: util::writefile can only save lines" if (ref $line); print $line; } return; } if (not defined $par) { util::assert::nosuchfile($file); } elsif ($par ne 'force') { die "internal: unknown parameter $par to util::writefile\n"; } open F, ">$file" or die "could not open file for writing: $!"; if (ref $what eq 'ARRAY') { foreach my $line (@$what) { if (ref $line) { die "internal: util::writefile can only". "save arrays (by ref) or scalars"; } print F $line; } } elsif (not ref $what) { print F $what; } close F; } package util::hash; # returns one if they are same # and 0 if they are not the same sub util::hash::equal { #if same elements of both are absent its ok #but one has it and other doesnt its not ok #will check nested hashes, but will get stuck in infinite recursion if #there are cirular references my ($h1,$h2,$list) = @_; if (not defined $list) { my @l1 = sort keys %$h1; my @l2 = sort keys %$h2; return 0 if not util::array::equal(\@l1,\@l2); $list = \@l1; } foreach my $key (@$list) { if (ref $h1->{$key} eq 'HASH' and ref $h2->{$key} eq 'HASH') { return 0 if util::hash::equal($h1->{$key},$h2->{$key}) == 0; } else { return 0 if not defined $h1->{$key} xor not defined $h2->{$key}; next if (not defined $h1->{$key} and not defined $h2->{$key}); return 0 if $h1->{$key} ne $h2->{$key}; } } return 1; } package util::array; sub util::array::common_prefix { my $in = shift; my $prefix = $in->[0]; ELEM: foreach my $e (@$in) { my $len = length($prefix); for (my $i = $len; $i>0;$i--) { my $sube = substr($e,0,$i); my $subp = substr($prefix,0,$i); if ($sube eq $subp) { $prefix = $subp; next ELEM; } } return ''; } return $prefix; } sub util::array::dist #calculate distance between two points in cartesian space represented by #two arrays of dimensionality 3 (which is not enforced) { my ($a1, $a2) = @_; my $dist; for (my $i=0;$i<3;$i++) { my $delta = $a2->[$i] - $a1->[$i]; $dist += $delta*$delta; } return sqrt($dist); } sub util::array::squeeze { my ($ar,$msg) = @_; if (scalar(@$ar) == 0) { app::error($msg); } return shift @$ar; } sub util::array::ff { my ($ar,$regex) = @_; while (@$ar) { my $el = shift @$ar; if ($el =~ /$regex/) { unshift @$ar, $el; return 1; } } return undef; } sub util::array::grabff { my ($ar,$regex) = @_; my @out; while (@$ar) { my $el = shift @$ar; if ($el =~ /$regex/) { unshift @$ar, $el; return \@out; } push @out, $el; } return undef; } sub util::array::tuples # generate all possible tuples from # provided array references # give output as a ref to array of arrays # elements in the output arrays should be # arranged in the same order as input { my @in = @_; if (scalar @in == 1) { my @out; foreach my $el (@{$in[0]}) { push @out, [$el]; } return \@out; } else { my $ar = pop @in; my @out; foreach my $el (@$ar) { my $prefixes = util::array::tuples(@in); foreach my $prefix (@$prefixes) { push @$prefix, $el; push @out, $prefix; } } return \@out; } } sub util::array::sum { my $ar = shift; my $sum; foreach my $el (@$ar) { $sum += $el; } return $sum; } sub util::array::extract # extract array elements within inclusive bounds { my ($ar,$lo,$hi) = @_; my @out; return [] if $lo > $hi; for (my $i=$lo;$i<=$hi;$i++) { push @out, $ar->[$i]; } return \@out; } sub util::array::max { my $ar = shift; my $max = $ar->[0]; foreach my $e (@$ar) { next if not defined $e; $max = $e if $e > $max; } return $max; } sub util::array::min { my $ar = shift; my $min = $ar->[0]; foreach my $e (@$ar) { $min = $e if $e < $min; } return $min; } sub util::array::mean { my $ar = shift; my $mean = util::array::sum($ar)/scalar(@$ar); return $mean; } sub util::array::std { my $ar = shift; my $mean = util::array::mean($ar); my $std; foreach my $el (@$ar) { $std += ($el-$mean)**2; } return sqrt($std/scalar(@$ar)); } sub util::array::thosein { my ($ar,$tmpl) = @_; my @out; foreach my $elem (@$ar) { push @out, $elem if util::array::isin($elem,$tmpl); } return @out; } sub util::array::isnum { my $ar = shift; foreach my $elem (@$ar) { return 0 if not util::scalar::isnum($elem); } return 1; } sub insure # if value is not an array reference # put it inside an array and give that ref { my $it = shift; return undef if not defined $it; if (ref $it ne 'ARRAY') { return [$it]; } return $it; } sub hash { my ($keys,$values) = @_; if (scalar(@$keys) != scalar(@$values)) { die "internal: dimension of key and ". "value arrays dont match"; } my $lim = scalar(@$keys); my %hash; for (my $i=0; $i < $lim; $i++) { $hash{$keys->[$i]} = $values->[$i]; } return %hash; } sub isa { my $ar = shift; return 1 if ref $ar eq 'ARRAY'; return 0; } sub util::array::isin { my ($what,$where) = @_; foreach my $try (@$where) { return 1 if ($try eq $what); } return 0; } sub equal { my ($a1,$a2) = @_; return 0 if @$a1 != @$a2; my $num = @$a1; for (my $i=0; $i<$num; $i++) { return 0 if $a1->[$i] ne $a2->[$i]; } return 1; } sub allmatch { my ($what,$such) = @_; foreach my $item (@$what) { return 0 if $item !~ /$such/; } return 1; } sub somematch { my ($what,$such) = @_; foreach my $item (@$what) { return 1 if $item =~ /$such/; } return 0; } sub util::array::somein #tests if at least one item from first array belongs to the second { my ($what, $ar) = @_; my @ar = @$ar; my @what = @$what; my @res; foreach my $item (@what) { foreach my $test (@ar) { if ($test eq $item) { return 1; } } } return 0; } sub allin #tests if all items from first array belong to the second { my ($what, $ar) = @_; my @ar = @$ar; my @what = @$what; my @res; foreach my $item (@what) { my $found = 0; foreach my $test (@ar) { if ($test eq $item) { $found = 1; last; } } return 0 if !$found; } return 1; } sub find # return index of element # function requires sorted array on input { my ($ar,$element) = @_; } sub util::array::merge # merge arrays into one and return reference for the latter { my @in = @_; my @out; foreach $a (@in) { push @out, @$a; } return \@out; } sub util::array::uniq # return ref to array containing only unique element # i.e. discard repeated elements # returned array is not sorted { my $in = shift; my %out; foreach my $el (@$in) { $out{$el}++; } return [keys %out]; } sub util::array::init{ #initialize array my ($len,$value) = @_; my @out; for (my $i=0; $i<$len; $i++){ push @out, $value; } return @out; } package util::line; sub toupper { my $line = shift; $line =~ tr/a-z/A-Z/; return $line; } sub findline # find one line from array of lines by pattern # quit with error if there is more then one line # found { my ($pattern,$lines) = @_; my @out = grep(/$pattern/,@$lines); if (scalar(@out) > 1) { die "internal: more then one line matches pattern in util::line::findline"; } elsif (scalar(@out) == 0) { return undef; } elsif (scalar(@out) == 1) { return $out[0]; } else { die "internal: it's my doomsday"; } } # remove leading and trailing spaces and remove end of line sub peel { my $line = shift; chomp $line; $line =~ s/^\s+(.*)/\1/; $line =~ s/\s+$//; return $line; } sub util::line::parserange # parse lists like 4,7,10-15 to arrays [4,7,10,11,12,13,14,15] # if list is not parseable, return undef # normally return reference to an array { my $in = shift; my @bits = split /,/, $in; my %out; foreach my $bit (@bits) { if ($bit =~ /^\d+$/) { $out{$bit} = 1; } elsif ($bit =~ /^(\d+)\-(\d+)$/) { my ($start,$end) = sort {$a<=>$b} ($1,$2); for (my $i = $start; $i<=$end; $i++) { $out{$i} = 1; } } else { return undef; } } return [sort {$a<=>$b} keys %out]; } # print list keeping language in mind # like requred arguments 1 and 2 are missing # of required argument q is missing # depending on number of arguments sub util::line::reportlist { my ($list,$out,$postfix) = @_; $_ = "'$_'" foreach @$list; if (@$list > 2) { my $last = pop @$list; my $start = join (', ', @$list); $out .= 's ' if defined $out; $out .= "$start and $last"; $out .= ' are ' if defined $postfix; } elsif (@$list == 2) { $out .= 's ' if defined $out; $out .= "$list->[0] and $list->[1]"; $out .= ' are ' if defined $postfix; } else #only one element { $out .= " $list->[0]"; $out .= ' is ' if defined $postfix; } $out .= $postfix if defined $postfix; return $out; } # print list of alternatives keeping language in mind # like requred arguments 1 and 2 are missing # of required argument q is missing # depending on number of arguments sub util::line::reportopt { my ($list,$out,$postfix) = @_; if (@$list > 2) { my $last = pop @$list; my $start = join (', ', @$list); $out .= 's ' if defined $out; $out .= "$start or $last"; $out .= ' are ' if defined $postfix; } elsif (@$list == 2) { $out .= 's ' if defined $out; $out .= "$list->[0] or $list->[1]"; $out .= ' are ' if defined $postfix; } else #only one element { $out .= " $list->[0]"; $out .= ' is ' if defined $postfix; } $out .= $postfix if defined $postfix; return $out; } sub util::line::reportnum { my ($num,$word) = @_; return "$num $word" if $num =~ /1$/; return "$num $word\s"; } sub util::line::wrap # wrap '-line' text by '-max' number of columns and leave '-indent' spaces # in the beginning of every line # if '-firstword' is true, remove first word from the '-line' # and print it in place of blank spaces in the first line # while trying to maintain indentation starting from the second word in # the '-line' # # if -indent_prefix is defined, the -indent is ignored and # the provided prefix is used to indent each line instead { my %arg = @_; my $max = $arg{-max} || 80; my $indent = $arg{-indent} || 8; my $line = $arg{-line}; my $firstword = $arg{-firstword} || undef; my $indent_characters = $arg{-indent_prefix} || undef; if (defined $indent_characters) { $indent = length($indent_characters); } return undef if $line eq ''; die "internal: must provide -line argument to util::line::wrap" if not defined $line; #my $endln = '(?:\012|\015|\015\012)'; #$line =~ s/$endln/ /g; # todo make sure to preserve end of lines #$line =~ s/\s+/ /g; my $len = $indent; my @original_words = split /[ \t\r\f]+/, $line; #make sure that all words are no longer then $indent+$max my @words; my $max_word_length = $max - $indent; foreach my $original_word (@original_words) { while (length($original_word) > $max_word_length) { my $word = substr($original_word,0,$max_word_length); push @words, $word; substr($original_word,0,$max_word_length) = ''; } if ($original_word !~ /^\s*$/) { push @words, $original_word; } } my $out; if (defined $firstword) { my $word = shift @words; $out = $word . blank($indent - length($word),$indent_characters); } else { $out = blank($indent,$indent_characters); } foreach my $word (@words) { if ($word =~ /\n/) { my @bits = split /\n/, $word; $out .= addword(shift @bits,\$len,$max,$indent,$indent_characters); foreach my $bit (@bits) { $out .= "\n" . util::line::blank($indent,$indent_characters) . $bit . " "; $len = $indent + length($bit); } } else { $out .= addword($word, \$len, $max, $indent,$indent_characters); } } sub addword { my ($word,$len,$max,$indent,$indent_characters) = @_; my $out = ''; if ($$len + length($word) + 1 < $max) { $out .= "$word "; $$len += length($word) + 1; } else { $out .= "\n"; $out .= blank($indent,$indent_characters); $out .= $word . ' '; $$len = $indent + length($word) + 1; } return $out; } return $out; } sub blank # return blank line containing $num empty spaces { my ($num,$cheat) = @_; if (defined $cheat) { return $cheat; } my $out; for (my $i=0; $i<$num;$i++) { $out .= ' '; } return $out; } package util::word; sub util::word::join { my @words = @_; if (ref $words[0] eq 'ARRAY') { @words = @{$words[0]}; } my $num = scalar @words; if ($num == 1) { return $words[0]; } elsif ($num == 2) { return "$words[0] and $words[1]"; } elsif ($num > 2) { my $last = pop @words; return join(', ',@words) . ' and ' . $last; } return ''; } package toolkit::validate; # procedural package to nmr data validation; sub resonances { my $res = shift; # need sequence } sub ass #sanity check for peak assignments { my @files = glob '*.save'; my @peaks; foreach my $file (@files) { my $peaks = sparky::readpeaks($file); push @peaks, @$peaks; } print "\n... ", scalar(@peaks), " peaks read \n"; toolkit::validate::peaks(\@peaks); } sub peaks #performs sanity check for peak assignments #if second argument is provided, only warnings will be printed to STDERR #otherwise verbose report will be printed to STDOUT #if there is any error, it will be remembered for each peak { my $peaks = shift; my $silent = shift; my %spectra; my $err; #todo check against sequence too!!! foreach my $peak (@$peaks) { $spectra{$peak->{spectrum}}++; next if not defined $peak->{ass}; #collect residues involved my %res; foreach my $ass (@{$peak->{ass}}) { $res{$ass->{residue_label}}++; } #allow only two residues involved in peaks in <4D spectra if (scalar(keys %res) > 2 && scalar(@{$peak->{ass}}) < 4) { $err++; print STDERR "Warning: peak at ", join('|',@{$peak->{pos}}), " in spectrum $peak->{spectrum} ", "has >2 resid.: '", join("', '",keys(%res)),"'\n"; push @{$peak->{errors}}, 'more then 2 residues in ass'; } #find relationships of involved residues #todo potentially can validate assignments schemes #against type of experiment #also may validate atom and residue names, #chemical shift sanity, deviation in resonance assignments } return if $silent; print "\nspectra processed:\n\n ", join("\n ",keys %spectra), "\n\n"; if (!$err) { print "sanity check passed\n"; } else { print "$err errors found\n"; } print "\n"; } package cppn; sub cppn::pseudoatoms { return qw(H N ND NE NZ NH HA HB HG HD HE SD C CA CB CG CD CE CZ); } sub cppn::readass # read cppn resonance list file and return reference to an array # containing resonance information { my ($in) = @_; my @lines = util::readfile($in); #read atom names table my $atom_list = shift @lines; my @atom_names = split /\s+/, $atom_list; my $ex = shift @atom_names;#useless exclamation mark if ($ex ne '!'){ app::warning("first line in $in should start with the exclamation mark?"); } my @resonances; my @errors; my $natoms = scalar(@atom_names); foreach my $line (@lines) { $line = util::line::peel($line); my @bits = split /\s+/,$line; my $num = shift @bits; my $aatype = shift @bits; #handle tabulated atoms my $catom = 0; for (my $i=0; $i<$natoms;$i++){ my $bit = $bits[$i]; if ($bit ne '-'){ my %resonance; my $atom_name = $atom_names[$i]; $resonance{residue} = $aatype . $num; $resonance{atom} = $atom_names[$i]; my @shifts = split /,/, $bit; my $nshifts = scalar(\@shifts); $resonance{'shift'} = $bit; push @resonances, \%resonance; } } print "hohoh\n"; #handle extended atoms $catom = $natoms; while (defined $bits[$catom]){ my $bit = $bits[$catom]; $catom++; #app::error("extended atoms are not handled yet"); } } util::dump(\@resonances); exit; if (scalar @errors > 0) { map {app::errmessage($_)} @errors; exit; } return \@resonances; } package sparky::atom; sub sparky::atom::parse { my $atom = shift; my @pieces; if ($atom !~ /[\[\]]/)# dont parse if there are no brackets { return $atom; } while (length($atom)) { if ($atom =~ /^[^[]/) # doesnt start with [ { return 0 if $atom =~ /^\]/;# bad if starts with ] $atom =~ s/^([^\[]+)(.*)$/\2/; push @pieces, $1;# put stuff before braket to @pieces } else { if ($atom !~ s/^(\[[^\]]+\])(.*)$/\2/) { return 0; } push @pieces, $1;# put stuff within bracket to @pieces } } return 0 if util::array::somematch(\@pieces,'^([^\]]*\][^\[]*\[.*|[^\[\]]*[\[\]][\]\[]*)$'); #return 0 if util::array::somematch(\@pieces,'^(\].?\[|[^\[\]]?[\[\]][\]\[]?)$'); return 0 if (scalar @pieces == 0); my @options = p2o(@pieces); sub p2o # 'pieces to options format' conversion { my @pieces = @_; my $piece = shift @pieces; if (not defined $piece) { my $ar = ['']; return @$ar; } if ($piece =~ /\[/) { my @stuff = split /|/, $piece; shift @stuff; pop @stuff; my @out; foreach my $bit (@stuff) { my @new = map {$bit . $_} p2o(@pieces); push @out, @new; } return @out; } else { return map {$piece . $_} p2o(@pieces); } } } sub sparky::atom::topipp { my $atom = shift; my @parsed = sparky::atom::parse($atom); return join('|',@parsed); } package sparky; # this is non object oriented, traditional procedural style package sub sparky::saveass { my ($ass,$sparky) = @_; my %res; foreach my $a (@$ass) { push @{$res{$a->{residue}}}, $a; } my @out; push @out, " Group Atom Nuc Shift SDev Assignments\n"; push @out, "\n"; foreach my $aa (sort {util::cmpaa($a,$b)} keys %res) #sort aminoacids by residue number { #my @ambig; #foreach my $at (@{$res{$aa}}) #{ # my $atom = $at->{atom}; ## if ($atom =~ /\|/) # { # push @ambig, $at; # } #} #my %ambig; #foreach my $at (@ambig) #{ # my @atoms = split /\|/,$at->{atom}; # my $first = $atoms[0]; # $first =~ /^([A-Z]+)(.*)$/; # my $prefix = $1; # my $postfix = $2 # if (length($postfix) > 1) # { # $prefix .= substr($postfix,0,length($postfix)-1); # } # push @{$ambig{$prefix}}, $at; #} #foreach my $group (keys %ambig) #{ # my $items = $ambig{$group}; # if (scalar(@$items)>2) # { # my @atoms = map {$_->{atom}} @$items; # app::error("cannot translate ". # join(',',@atoms). # " to wildcard"); # } # $items->[0]{atom} = $group . '3'; # $items->[1]{atom} = $group . '2'; #} foreach my $at (sort {$a->{atom} cmp $b->{atom}} @{$res{$aa}}) { my $atom = $at->{atom}; my $freq = $at->{'shift'}; my $nuc; $atom =~ s/\|/,/g; next if not defined $freq; next if $freq > 300; $nuc = '13C' if $atom =~ /^C/; $nuc = '1H' if $atom =~ /^H/; $nuc = '15N' if $atom =~ /^N/; $atom =~ s/\#/*/g; my $line = sprintf "%6s%7s%5s%9.3f%7.3f%7d\n", $aa,$atom,$nuc,$freq,0.000,1; push @out, $line; } } util::writefile(\@out,$sparky); } sub sparky::writepeaks { my ($peaks,$file) = @_; my @lines; my $dim = internal::spectrum::dim($peaks->[0]); my $format = "%15s"; my @ass; for (my $i=0; $i<$dim; $i++) { $format .= "%7.3f"; # todo important here write out assingment properly push @ass, '?'; } $format .= "\n"; my $ass = join('-',@ass); push @lines, "tile line\n"; foreach my $peak (@$peaks) { my $pos = $peak->{'pos'}; my $line = sprintf $format, $ass, @$pos; push @lines, $line; } util::writefile(\@lines,$file); } # read sparky save file and save peaks into second argument by reference sub sparky::readpeaks { my $file = shift; my @lines = util::readfile($file); my $out; # for now only read one ... clause shift @lines while $lines[0] !~ //; shift @lines;# tag line pop @lines while $lines[$#lines] !~ //; pop @lines; # now only read ONE clause!!! die "unexpected format of sparky .save file" if grep(//,@lines); use File::Basename; $file = basename($file); $file =~ s/^([^\.]+)\..*$/\1/; while (@lines) { shift @lines while @lines && $lines[0] !~ /type peak/; last if !@lines; shift @lines; my @peaklines; push(@peaklines, shift @lines) while @lines && $lines[0] !~ /type peak/; while (@peaklines) { shift @peaklines while @peaklines && $peaklines[0] !~ /^id/; last if !@peaklines; $peaklines[0] =~ /^id\s+(\d+)$/; my $id = $1; shift @peaklines while @peaklines && $peaklines[0] !~ /^pos/; last if !@peaklines; my $line = $peaklines[0]; chomp $line; #parse this kind of line: #pos 49.185 124.411 9.480 $line =~ s/^pos\s+(.*)$/\1/; $line =~ s/([^\s])\s+$/\1/;#trailing space my @position = split /\s+/, $line; my %peak; $peak{pos} = \@position; $peak{spectrum} = $file; #todo: possibly parse these in the future too: #height 3020624.582 2876335.750 #linewidth 115.178 75.290 34.785 fit #integral 9.7639e+07 ga $line = util::line::findline('^height',\@peaklines); if (defined $line) { $line = util::line::peel($line); if ($line !~ /^height\s+($NUMPATTERN)\s+($NUMPATTERN)/) { print "here\n"; } $peak{intensity} = $2; } $line = util::line::findline('^integral',\@peaklines); if (defined $line) { $line = util::line::peel($line); $line =~ /^integral\s+($NUMPATTERN)\s+[a-zA-Z]+$/; $peak{integral} = $1; } #fr 0.035 #read in the assignment: #rs |L37|CA| |L37|NH| |L37|HN| shift @peaklines while @peaklines && $peaklines[0] !~ /^rs/; if (@peaklines) { $peaklines[0] =~ /^rs\s+(.*)$/; my $ass = $1; $ass =~ s/^\s*\|//; $ass =~ s/\|\s*$//; my @ass = split /\|\s+\|/, $ass; my @ASS; foreach my $ass (@ass) { my ($res,$atom) = split /\|/, $ass; my $ass_type = undef; $ass_type = ($res =~ /$ASSIGNED_FORMAT/)? 'assigned':$ass_type; $ass_type = ($res =~ /$PSEUDOASSIGNED_FORMAT/)? 'pseudoassigned':$ass_type; my $residue_name = $1 || undef; my $seq_pos = $2 || undef; if ($res =~ /$PSEUDOASSIGNED_ROOT_FORMAT/) { $seq_pos = 0; } push @ASS , {residue_label=>$res, atom=>$atom, resno=>$seq_pos, ass_type=>$ass_type, seq_pos=>$seq_pos, residue_name=>$residue_name }; } $peak{ass} = \@ASS;#assignment } push @$out, \%peak; } } return $out; } sub sparky::readass # read sparky resonance list file and return reference to an array # containing resonance information { # todo urgent i really dont like treatment of ambiguous assignments # and a very big problem is that i am doing atom name translations # here which i should not be doing my ($in,$mode,$nom) = @_; $nom = 'iupac' if not defined $nom; if (ref $in eq 'ARRAY') { app::error("can't read more then one ass table at a time"); } elsif (ref $in) { die "internal: wrong input parameter"; } my @lines = util::readfile($in); shift @lines; shift @lines; my @resonances; my @errors; foreach my $line (@lines) { chomp $line; $line =~ s/^\s+//; my @bits = split /\s+/, $line; my $residue = $bits[0]; if ($mode eq 'strict' and $nom ne 'none') { next if bio::protein::sequence::badresidue($residue); } my ($aatype,$num) = bio::protein::sequence::idres($residue); my $aatype = bio::protein::aminoacid::format( -res=>$residue, -style=>'one_letter' ); my @atoms = sparky::atom::parse($bits[1]); my @ATOMS; if ($nom eq 'none') #if nomenclature is 'none', then atom names are not to be translated { @ATOMS = @atoms; } else #if atoms are not in iupac nomenclature then translate to iupac #test atom names against the atom name database { foreach my $atom (@atoms) { if ($atom eq '0') { app::error("unrecognized atom '$bits[1]\@$bits[0]'"); } if ($nom ne 'iupac' or $nom ne 'none') { $atom = bio::protein::aminoacid::atom::translate( -from=>$nom, -to=>'iupac', -atom=>$atom, -aminoacid=>$aatype ); } if (bio::aminoacid::badatom($atom,$aatype)) { push @errors, "unexpected atom type $atom\@$residue in ". "sparky assignment table $in"; next; } push @ATOMS, $atom; } } my %resonance; $resonance{residue} = $aatype . $num; $resonance{atom} = join('|',@ATOMS); $resonance{'shift'} = $bits[3]; push @resonances, \%resonance; } if (scalar @errors > 0) { map {app::errmessage($_)} @errors; exit; } return \@resonances; } package xeasy; sub xeasy::readseq { my $file = shift; my @lines = util::readfile($file); chomp foreach @lines; my @lines = map { util::line::peel($_) } @lines; my @one = grep /^\w+$/, @lines; my @OUT; my $first_residue_number = 1; if (scalar(@one) == scalar(@lines)) #just residue names, no numbers { my @aa = grep /^[A-Z]{3}$/, @lines; if (scalar(@aa) != scalar(@lines)) { app::error("sequence in xeasy format only allows three-letter aminoacid codes"); } foreach my $res (@lines) { my $aa_name = $AA{$res}; if (not defined $aa_name) { app::error("could not understand residue code '$res' in file $file"); } push @OUT, $aa_name; } unshift @OUT, scalar(@OUT); } else { my @two = grep /^\w+\s+\w+$/, @lines; if (scalar(@two) == scalar(@lines)) #residue names and residue numbers { $first_residue_number = 999999999999; foreach my $line (@lines) { my ($res,$num) = split /\s+/, $line; my $aa_name = $AA{$res}; if (not defined $aa_name) { app::error("could not understand residue code '$res' in file $file"); } if ($num !~ /^\d+$/) { app::error("$num is not a valid residue number in file $file"); } if ($num < $first_residue_number) { $first_residue_number = $num; } $OUT[$num] = $aa_name; } $OUT[1] = scalar(@lines); if ($first_residue_number != 1) { app::error("the case where residues dont start from 1 is ". "not yet implemented, if you need this feature, ". "ask the programmer to fix it"); $OUT[2] = $first_residue_number; } } else { app::error("could not interprete file $file as xeasy protein sequence"); } } return \@OUT; } sub xeasy::readass # read xeasy assignment table from file and aminoacid sequence # for now no nomenclature support { my ($file,$seq,$nom) = @_; my @lines = util::readfile($file); #remove commented out and empty lines @lines = grep !/^#/, @lines; @lines = grep !/^\s*$/, @lines; @lines = map {util::line::peel($_)} @lines; my @ass; foreach my $line (@lines) { my @bits = split /\s+/, $line; my $num = $bits[0]; my $shift = $bits[1]; my $name = $bits[3]; my $resno = $bits[4]; next if $resno <= 0 ; my $residue = $seq->[$resno] . $resno; push @ass, {atom=>$name,'shift'=>$shift,xeasy_id=>$num,residue=>$residue}; } return \@ass; } sub xeasy::readpeaks # read xeasy peaktable given an assignment table { my ($file,$ass) = @_; my @lines = util::readfile($file); use File::Basename; my $spectrum = basename($file); my $first = $lines[0]; @lines = grep !/^#/, @lines; @lines = grep !/^\s*$/, @lines; #index assignment table using resonance id my %ass; for my $res (@$ass) { my $id = $res->{xeasy_id}; $ass{$id} = $res; } $first =~ /(\d+)/; my $dim = $1; my @peaks; for my $line (@lines) { my @bits = split /\s+/, $line; shift @bits; my $peakno = shift @bits; my @shifts; for (my $i=0;$i<$dim;$i++) { push @shifts, shift @bits; } shift @bits; shift @bits; my $vol = shift @bits; shift @bits; shift @bits; shift @bits; my @ass_ref; for (my $i=0;$i<$dim;$i++) { push @ass_ref, shift @bits; } my @ass = map {$ass{$_}} @ass_ref; push @peaks, {'pos'=>\@shifts,ass=>\@ass,intensity=>$vol,spectrum=>$spectrum}; } return \@peaks; } sub xeasy::saveass { my ($ass,$file) = @_; my %res; foreach my $a (@$ass) { push @{$res{$a->{residue}}}, $a; } my @out; push @out, "# generated by $prog\n"; my $atomnum = 1; foreach my $aa (sort {util::cmpaa($a,$b)} keys %res) { my @ambig; foreach my $at (@{$res{$aa}}) { my $atom = $at->{atom}; if ($atom =~ /\|/) { push @ambig, $at; } } my %ambig; foreach my $at (@ambig) { my @atoms = split /\|/,$at->{atom}; my $first = $atoms[0]; $first =~ /^([A-Z]+)(.*)$/; my $prefix = $1; if (length($2) > 1) { $prefix .= substr($2,0,length($2)-1); } push @{$ambig{$prefix}}, $at; } foreach my $group (keys %ambig) { my $items = $ambig{$group}; if (scalar(@$items)>2) { my @atoms = map {$_->{atom}} @$items; app::error("cannot translate ". join(',',@atoms). " to wildcard"); } $items->[0]{atom} = $group . '3'; $items->[1]{atom} = $group . '2'; } my $seqnum; $aa =~ /[A-Z]+(\d+)$/; my $seqnum = $1; if (not defined $seqnum) { app::error('residue '.$aa.' lacks sequence number') } foreach my $at (sort {$a->{atom} cmp $b->{atom}} @{$res{$aa}}) { my $atom = $at->{atom}; my $freq = $at->{'shift'}; $atom =~ s/\#/*/g; my $line = sprintf "%-5s%7.3f%7.3f%6s%4d\n", $atomnum,$freq,0.0,$atom,$seqnum; $atomnum++; push @out, $line; } } util::writefile(\@out,$file); } package pipp; sub pipp::saveass { my ($ass,$file,$ignore) = @_; my %aa; foreach my $reson (@$ass) { my $atom = $reson->{atom}; my $residue = $reson->{residue}; my $lbl_msg = "atom label '$atom' for residue '$residue' ". 'not understood'; if ($atom =~ /[\[\]]/) { my $pippatom = sparky::atom::topipp($atom); app::message("note: translated atom $atom to $pippatom"); $atom = $pippatom; } if (not $atom) { app::error($lbl_msg); } $reson->{atom} = $atom; push @{$aa{$reson->{residue}}}, $reson; } my @aa = bio::protein::sequence::sort(keys %aa); app::error("there are no residues") if scalar(@aa) == 0; my @OUT; push @OUT, "SHIFT_FL_FRMT RES_SIAD\n"; my $first = $aa[0]; my ($junk,$num) = bio::protein::sequence::idres($first); $first = $num; push @OUT, "FIRST_RES_IN_SEQ ", $num, "\n\n"; foreach my $aa (@aa) { my $atoms = $aa{$aa}; my $residue = $aa; $aa = bio::protein::aminoacid::format(-res=>$aa, -style=>'three_letter'); my ($aa,$num) = bio::protein::sequence::idres($aa); push @OUT, "RES_ID ", $num, "\n"; push @OUT, "RES_TYPE ", $aa, "\n"; push @OUT, "SPIN_SYSTEM_ID ", $num, "\n"; # not all pipp data readers handle this input for some reason # for example aria2 doesn't #print F "HETEROGENEITY 100\n"; foreach my $atom (@$atoms) { my $shift = $atom->{'shift'}; my $type; if ($ignore == 'ignore') { $type = $atom->{atom}; } else { $type = bio::protein::aminoacid::atom::translate( -from=>'iupac', -to=>'xplor', -atom=>$atom->{atom}, -aminoacid=>$residue ); } push @OUT, sprintf(" %s %-7.2f\n",$type,$shift); } push @OUT, "END_RES_DEF\n\n"; } util::writefile(\@OUT,$file); } sub pipp::readass # read pipp resonance list file and return reference to an array # containing resonance information { my ($file,$nom) = @_; $nom = 'xplor' if not defined $nom; my @lines = util::readfile($file); my $i; my @resonances; SOURCE: while (1) { $i++; my ($res_num, $res_type, $shift, $junk); skiptopar(\@lines, 'RES_ID', \$res_num) or last; my $tmp; skiptopar(\@lines, 'RES_TYPE', \$tmp) or last; if ($nom eq 'none') { $res_type = $tmp; } else { $res_type = $AA{$tmp} or die "$i don't have one letter code for $tmp $res_num found in $file"; } while (1) { util::array::ff(\@lines, '^\s*\S+\s+[-]?\d+(.\d+)?\s*$') or last SOURCE; last if $lines[0] !~ /SPIN_SYSTEM_ID/; shift @lines; } while ($lines[0] !~ /^END_RES_DEF/) { my $line = util::line::peel(shift @lines); $junk = $line; next if $line =~ /HETEROGENEITY/; my ($atom,$shift); if ($line =~ /\s+/) { ($atom,$shift) = split(/\s+/,$line); } else { next; } ## todo very important here # like in sparky::readass # i commented out some lines that deal with ambiguous assignments # i think it was incorrect before #my @atoms; #if ($atom =~ /\|/) #{@atoms = split(/\|/,$atom) if $atom =~ /\|/} #else #{push @atoms, $atom}; #foreach my $atom (@atoms) #{ # next if not defined $shift; my %resonance; $resonance{residue} = $res_type . $res_num; $resonance{atom} = $atom; $resonance{'shift'} = $shift; push @resonances, \%resonance; #} } } nmr::ass::translate(\@resonances,$nom,'iupac') if $nom ne 'none'; return \@resonances; } sub pipp::writepeaks { my ($file,$peaks) = @_; my $format = 'FORMAT %4d %3d'; my $dim = scalar(@{$peaks->[0]{'pos'}}); for (my $i=0;$i<$dim;$i++) { $format .= ' %7.2f'; } $format .=' %+8.2e %s %s'."\n"; my $line2 = "VARS PkID Sl.Z"; my @dims = qw(X Y Z A); for (my $i=0;$i<$dim;$i++) { $line2.= ' '.$dims[$i]; } $line2 .= ' Intensity Assign1 Assign2'."\n"; my @out; push @out, 'DIMCOUNT '. $dim. "\n"; push @out, $format . $line2; $format =~ s/FORMAT/ /; $format =~ s/\s+/ /g; $format .= "\n"; for my $peak (@$peaks) { my $ass = $peak->{ass}; my $shifts = $peak->{pos}; my $int = $peak->{'intensity'}; my $id = $peak->{id}; my @shifts; my @labels; for (my $i=0; $i[$i]{atom}; push @shifts, $shifts->[$i]; my $label = $ass->[$i]{residue}.'-'.$atom; push @labels, $label; } push @out, sprintf($format, $id,0, @shifts, $int, @labels); } util::writefile(\@out,$file); } sub pipp::readpeaks { #FORMAT %4d %3d %7.2f %7.2f %7.2f %+8.2e %s %s %5.2f #VARS PkID Sl.Z X Y Z Intensity Assign1 Assign2 MinDis # 1093 1 3.14 4.14 55.89 -7.75e+05 **** **** **** # 1096 1 2.41 1.53 55.83 -2.40e+05 **** **** **** my $file = shift; my $spectrum = util::filebasename($file); my @lines = util::readfile($file); util::array::ff(\@lines,'^FORMAT'); my $format = shift @lines; shift @lines; app::error("peaklist $file has no peaks") if scalar(@lines) == 0; chomp $format; my @bits = split(/\s+/,$format); my $dim = scalar(grep /\%7\.2f/, @bits); util::assert(sub{$dim > 0}, "cannot recognize format string in file $file, ". '%7.2f format is expected for chemical shift columns'); my @peaks; # todo here this whole line parsing is kind of dangerous with split # IMPORTANT # since some fields may be empty in the file foreach my $line (@lines) { my $line = util::line::peel($line); my @bits = split /\s+/, $line; my $id = shift @bits; # todo maybe actually remember slice number shift @bits; my @pos; for (my $i=0;$i<$dim;$i++) { $pos[$i] = shift @bits; } my $intensity = shift @bits; my %peak; $peak{'pos'} = \@pos; $peak{'intensity'} = $intensity; $peak{'spectrum'} = $spectrum; $peak{'id'} = $id; push @peaks, \%peak; # todo here chew in the assignment as well } return \@peaks; } # skip lines (and discard them on the way) until certain line # starting with $par and matching pattern used in the subroutine # set parameter value by reference # return 0 if there is no more lines and 1 otherwise sub skiptopar { my ($lines, $par, $storage) = @_; while (@$lines && $lines->[0] !~ /^$par\s+/) {shift @$lines;} return 0 if !@$lines; $lines->[0] =~ /^$par\s+([0-9A-Z]+)$/; ${$storage} = $1; shift @$lines; return 1; } package toolkit::internal; package nmr; sub nmr::gamma { my $nuc = shift; #numbers taken from Evans, "Biomolecular NMR spectroscopy" 1995, table on p. 7 #units are 10^7 rad/(s*T) my %gamma = ( '1H'=>26.7522, '13C'=>6.7283, '15N'=>-2.7126, '19F'=>25.1816, '13P'=>10.8394); my @nuclei = keys %gamma; if (util::array::isin($nuc,\@nuclei)) { return $gamma{$nuc}; } else { die "internal, don't know gamma for nucleus $nuc"; } } sub nmr::noe::restraint::violation { my ($rest,$dist) = @_; my $rdist = $rest->{dist}; my $rdplus = $rest->{dplus}; my $rdminus = $rest->{dminus}; my $max = $rdist + $rdplus; my $min = $rdist - $rdminus; if ($dist > $max) { return $dist - $max; } elsif ($dist < $min) { return $dist - $min; } return 0; } sub nmr::ass::translate { my ($ass, $from, $to) = @_; my %nom = bio::protein::aminoacid::atom::nomenclature(); foreach my $res (@$ass) { my $atom = $res->{atom}; #my ($aa,$num) = bio::protein::sequence::idres($res->{residue}); $res->{atom} = bio::protein::aminoacid::atom::translate( -atom=>$atom, -aminoacid=>$res->{residue}, -from=>$from, -to=>$to, -table=>\%nom); } return; } sub nmr::readass { my ($file, $fmt, $nom, $seq) = @_; my $ass; if ($fmt eq 'cppn'){ $ass = cppn::readass($file); } elsif ($fmt eq 'sparky') { $ass = sparky::readass($file,'strict',$nom); } elsif ($fmt eq 'pipp') { $ass = pipp::readass($file,$nom); } elsif ($fmt eq 'xeasy' or $fmt eq 'cara') { die "nmr::readass for 'xeasy' format needs sequence" if not defined $seq; $seq = xeasy::readseq($seq); $ass = xeasy::readass($file,$seq,$nom); #for now assume that 'xeasy' follows iupac atom nomenclature if ($nom eq 'cara') { nmr::ass::translate($ass,'cara','iupac'); } } else { app::error("format '$fmt' not understood"); } return $ass; } sub nmr::resetresidues #start lowest numbered residue with 1 wihtin the ass table and #renumber others accordingly { my $ass = shift; my %nums; for my $atom (@$ass) { my ($aa,$num) = bio::protein::sequence::idres($atom->{residue}); $nums{$num} = 1; } my @nums = sort {$a<=>$b} keys %nums; my $first = $nums[0]; for my $atom (@$ass) { my ($aa,$num) = bio::protein::sequence::idres($atom->{residue}); $num -= $first; $atom->{residue} = $aa.$num; } return $ass } sub nmr::readpeaks { my ($file, $fmt, $nom) = @_; if ($fmt eq 'sparky') { return sparky::readpeaks($file,$nom); } elsif ($fmt eq 'pipp') { return pipp::readpeaks($file,$nom); } else { app::error("format '$fmt' not understood"); } } sub nmr::writeass { my ($ass,$file,$fmt) = @_; if ($fmt eq 'sparky') { sparky::saveass($ass,$file); } elsif ($fmt eq 'pipp') { pipp::saveass($ass,$file); } elsif ($fmt eq 'xeasy') { xeasy::saveass($ass,$file); } else { app::error("format '$fmt' not understood"); } } package nmr::aria; sub nmr::aria::noe::parse # parse aria and xplor noe restraint # into a data structure # %rest: # peakno=>$PEAKNO, # specno=>$SPECNO, # dist=>$DIST, # dminus=>$DMINUS, # dplus=>$DPLUS, # ppm1=>$PPM1, # ppm2=>$PPM2, # volume=>$VOLUME, # @ass : # {atom1=>@atom1,atom2=>@atom2} # @atom#: just like atom in assignment table that readass produces { my ($input, $seq) = @_; #todo maybe finish this } sub nmr::aria::restraint::parsenoe { my ($input,$seq) = @_; my @bits; foreach (@$input) { $_ = util::line::toupper($_); $_ = util::line::peel($_); push @bits, split(/\s+/,$_); } my @tok; foreach my $token (@bits) { while (length($token) > 0) { if ($token =~ /^[{}()]/) { $token =~ s/^([{}()])(.*)$/$2/; push @tok, $1; } else { $token =~ s/^([^{}()]+)(.*)$/$2/; push @tok, $1; } } } my $errmsg = 'premature termination of restraint record'; my $token = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$token =~ /^ASSI/}, 'noe assignment must start with ASSI(gn) word'); my $token = util::array::squeeze(\@tok,$errmsg); my $PEAKNO; if ($token eq '{') { $PEAKNO = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$PEAKNO =~ /^\d+/}, "number expected for peak # $PEAKNO found"); my $next = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$next eq '}'},"'}' expected after peak number"); } my ($SEGID1,$RESNO1,$ATOM1); my $sel = xplor::grabsel(\@tok,$errmsg); util::assert(sub{scalar(@tok) > 0},'incomplete assign statement: '. 'only one ATOM selection read'); ($SEGID1,$RESNO1,$ATOM1) = xplor::parsesel($sel); my $sel2 = xplor::grabsel(\@tok,$errmsg); util::assert(sub{scalar(@tok) > 0},'incomplete assign statement: '. 'two ATOM selections read but no restraint distances found'); my ($SEGID2,$RESNO2,$ATOM2) = xplor::parsesel($sel2); my $DIST = util::array::squeeze(\@tok,$errmsg); my $DPLUS = util::array::squeeze(\@tok,$errmsg); my $DMINUS = util::array::squeeze(\@tok,$errmsg); util::assert(sub{util::scalar::isnum($DIST)}, "number exected for restraint distance $DIST found"); util::assert(sub{util::scalar::isnum($DPLUS)}, "number exected for positive violation tolerance $DPLUS found"); util::assert(sub{util::scalar::isnum($DMINUS)}, "number exected for negative violation tolerance $DMINUS found"); my $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$tmp eq 'PEAK'},"word PEAK expected, $tmp found"); if (defined ($PEAKNO)) { my $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$PEAKNO eq $tmp}, "peak number in { ...} and after word PEAK are different"); } else { $PEAKNO = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$PEAKNO =~ /^\d+$/}, "number expected for peak # $PEAKNO found"); } $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$tmp eq 'SPECTRUM'}, "'SPECTRUM' token expected $tmp found"); my $SPECNO = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$SPECNO =~ /^\d+$/},"number expected for spectrum #, $tmp found"); util::array::squeeze(\@tok,$errmsg); util::array::squeeze(\@tok,$errmsg); $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$tmp eq 'VOLUME'}, "'VOLUME' token expected $tmp found"); my $VOLUME = util::array::squeeze(\@tok,$errmsg); util::assert(sub{util::scalar::isnum($VOLUME)}, "number expected for peak volume, $VOLUME found"); $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$tmp eq 'PPM1'}, "'PPM2' token expected $tmp found"); my $PPM1 = util::array::squeeze(\@tok,$errmsg); util::assert(sub{util::scalar::isnum($PPM1)}, "number expected for first chemical shift $PPM1 found"); $tmp = util::array::squeeze(\@tok,$errmsg); util::assert(sub{$tmp eq 'PPM2'}, "'PPM2' token expected $tmp found"); my $PPM2 = util::array::squeeze(\@tok,$errmsg); util::assert(sub{util::scalar::isnum($PPM2)}, "number expected for second chemical shift $PPM2 found"); # %rest = (peakno,specno,prot1,het1,prot2,het2,dist,dminus,dplus,aria) my %rest = ( peakno=>$PEAKNO, specno=>$SPECNO, dist=>$DIST, dminus=>$DMINUS, dplus=>$DPLUS, ppm1=>$PPM1, ppm2=>$PPM2, volume=>$VOLUME, resid1=>$RESNO1, atom1=>$ATOM1, segid1=>$SEGID1, resid2=>$RESNO2, atom2=>$ATOM2, segid2=>$SEGID2 ); return \%rest; } package xplor; # package for printing and reading xplor statements # and processing other xplor and xplor format related # data sub xplor::noe::read #read and parse distance restraint files { my $files = shift;#reference to array of distance restraint files or one file as scalar my $tbl = util::array::insure($files); my @REST; foreach my $file (@$files) { my @lines = util::readfile($tbl); @lines = grep {!/^\!/ and !/^\s*$/} @lines; while (@lines) { my $line = shift(@lines); if ($line =~ /ASSI/i) { # @lines has restraint lines in it # $res1 and $res2 are intervals #1 and #2 # each interval might be either one residue #[.] # or interval # my $rest = xplor::parse_rest(\@lines,$line); push @REST, $rest; } } } return \@REST; } sub xplor::hnha::read #read and parse hnha J coupling restraint files { my $files = shift;#reference to array of distance restraint files or one file as scalar my $tbl = util::array::insure($files); my @REST; foreach my $file (@$files) { my @lines = util::readfile($tbl); @lines = grep {!/^\!/ and !/^\s*$/} @lines; while (@lines) { my $line = shift(@lines); if ($line =~ /ASSI/i) { # @lines has restraint lines in it # $res1 and $res2 are intervals #1 and #2 # each interval might be either one residue #[.] # or interval # my $rest = xplor::parse_hnha_rest(\@lines,$line); push @REST, $rest; } } } return \@REST; } sub xplor::restraint_is_valid #if selections are valid and distances are defined and upper limit > lower limit #and both limits are positive returns 1, otherwise returns 0 #$msg must be reference to error message { my ($rest,$msg) = @_; my $dist = $rest->{dist}; my $dplus = $rest->{dplus}; my $dminus = $rest->{dminus}; my $ret = 1; my @err; if (not defined $dist) { $ret = 0; push @err, 'distance not defined'; } if (not defined $dplus) { $ret = 0; push @err, 'dplus not defined'; } if (not defined $dminus) { $ret = 0; push @err, 'dminus not defined'; } my $upl = $dist + $dplus; my $lol = $dist - $dminus; if ($upl < $lol) { $ret = 0; push @err, 'upper limit is less then lower limit'; } if ($upl < 0) { $ret = 0; push @err, 'upper limit is negative'; } if ($lol < 0) { $ret = 0; push @err, 'lower limit is negative'; } my $pairs = $rest->{selections}; if (not defined $pairs) { $ret = 0; push @err, 'entire selection statement could not be parsed'; } foreach my $pair (@$pairs) { if (not defined $pair) { $ret = 0; push @err, 'one of the alternative selections could not be parsed'; } else { my $ngroups = scalar @$pair; if (scalar $ngroups != 2) { $ret = 0; push @err, 'only one or more group(s) of two selections are '. 'allowed for distance restraints, '."$ngroups found"; } else { foreach my $group (@$pair) { if (not defined $group) { $ret = 0; push @err, 'a group of atom selections could not be parsed'; } if (scalar(@$group)==0) { $ret = 0; push @err, 'no atoms found in selection'; } foreach my $atom (@$group) { if (not defined $atom) { $ret = 0; push @err, 'atom selection could not be parsed'; } else { if ($atom->[0] !~ /^\d+$/) { $ret = 0; push @err, "noninteger residue number $atom->[0] found"; } if (not defined $atom->[1]) { $ret = 0; push @err, "could not determine atom name for ". "residue $atom->[0]"; } } } } } } } $$msg = join(", ",@{util::array::uniq(\@err)}); return $ret; } sub xplor::ambiguate_restraint_debug { my ($rest,$lib,$atom_table,$seq) = @_; #$lib is ambiguation table #$atom_table - just list of atoms by residue for faster globbing #$rest - parsed restraint data structure my $pairs = $rest->{selections}; my %ambig_rest = %$rest; my @ambig_pairs; foreach my $pair (@$pairs) { my @ambig_pair; for (my $i=0;$i<2;$i++) { my $sel_group = $pair->[$i]; my %atoms; #unroll wildcards and then take unique foreach my $atom (@$sel_group) { my $n = $atom->[0]; my $residue = $seq->[$n]; my $atom = $atom->[1]; my %glob = (residue=>$residue,atom=>$atom); if (not defined $residue) { app::error("no residue found for aminoacid number $n, is the sequence correct?"); } my $globbed = bio::aminoacid::globatoms(\%glob,'xplor',$atom_table); util::print(%glob); foreach my $g (@$globbed) { my $atom_name = $g->{atom}; my $related_atoms = $lib->{$residue}{$atom_name}; foreach my $related_atom (@$related_atoms) { my $xplor_atom = [$n,$related_atom]; my $key = join('@',@$xplor_atom); $atoms{$key} = $xplor_atom; } } } push @ambig_pair, [values %atoms]; } push @ambig_pairs,\@ambig_pair; } util::print(@ambig_pairs); $ambig_rest{selections} = \@ambig_pairs; return \%ambig_rest; } sub xplor::ambiguate_restraint { my ($rest,$lib,$atom_table,$seq) = @_; #$lib is ambiguation table #$atom_table - just list of atoms by residue for faster globbing #$rest - parsed restraint data structure my $pairs = $rest->{selections}; my %ambig_rest = %$rest; my @ambig_pairs; foreach my $pair (@$pairs) { my @ambig_pair; for (my $i=0;$i<2;$i++) { my $sel_group = $pair->[$i]; my %atoms; #unroll wildcards and then take unique foreach my $atom (@$sel_group) { my $n = $atom->[0]; my $residue = $seq->[$n]; my $atom = $atom->[1]; my %glob = (residue=>$residue,atom=>$atom); if (not defined $residue) { app::error("no residue found for aminoacid number $n, is the sequence correct?"); } my $globbed = bio::aminoacid::globatoms(\%glob,'xplor',$atom_table); foreach my $g (@$globbed) { my $atom_name = $g->{atom}; my $related_atoms = $lib->{$residue}{$atom_name}; foreach my $related_atom (@$related_atoms) { my $xplor_atom = [$n,$related_atom]; my $key = join('@',@$xplor_atom); $atoms{$key} = $xplor_atom; } } } push @ambig_pair, [values %atoms]; } push @ambig_pairs,\@ambig_pair; } $ambig_rest{selections} = \@ambig_pairs; return \%ambig_rest; } sub xplor::gen_restraint_key #generate a unique key for a given restraint based on selection { my $rest = shift; my $pairs = $rest->{selections}; my @bits; foreach my $pair (@$pairs) { my @sel; foreach my $group (@$pair) { my @atoms; foreach my $atom (@$group) { push @atoms, join('#atom#',@$atom); } push @sel, join('#group#',sort @atoms); } my $bit = join('#sel#',sort @sel); push @bits, $bit; } my $key = join('#pairs#',sort @bits); return $key; } sub xplor::get_upper_limit { my $rest = shift; my $uplim = $rest->{dist} + $rest->{dplus}; return $uplim; } sub xplor::print_restraint { my $rest = shift; print xplor::noe::string($rest); } sub xplor::noe::write { my ($rest,$file) = @_; my @lines; return if scalar(@$rest) == 0; foreach my $r (@$rest) { push @lines, xplor::noe::string($r); } $file =~ s/\s+/-/g; util::writefile(\@lines,$file,'force'); } sub xplor::noe::text { my ($rest,$seq) = @_; my $out = 'assign '; my $selections = $rest->{selections}; my $dist = $rest->{dist}; my $dplus = $rest->{dplus}; my $dminus = $rest->{dminus}; my @pair_tokens; foreach my $pair (@$selections) { my @group_tokens; foreach my $group (@$pair) { my @atom_tokens; foreach my $atom (@$group) { my $num = $atom->[0]; my $residue = $seq->[$num]; my $name = $atom->[1]; push @atom_tokens, "$residue" . $num . '.' . "$name"; } push @group_tokens, join(" or ",@atom_tokens); } push @pair_tokens, join("\t",@group_tokens); } my @lines; my $lo = sprintf "%.2f", $dist - $dminus; my $hi = sprintf "%.2f", $dist + $dplus; push @lines, $pair_tokens[0] . "\t$lo .. $hi\n"; shift @pair_tokens; foreach my $pair_token (@pair_tokens) { push @lines, $pair_token . "\t\n"; } return join('',@lines); } sub xplor::comment { #wrap line into 32 character long pieces each starting with a ! character my $line = shift; return util::line::wrap(-max=>132,-line=>$line,-indent_prefix=>'! '); } sub xplor::noe::string { my $rest = shift; my $out = "assign\n"; my $selections = $rest->{selections}; my $dist = $rest->{dist}; my $dplus = $rest->{dplus}; my $dminus = $rest->{dminus}; my $comments = $rest->{comments}; my $file = $rest->{file}; my @pair_tokens; my $file_comment = xplor::comment("Source file: $file"); $out .= "$file_comment\n"; if (defined $comments) { foreach my $comment (@$comments) { $comment =~ s/^!*//; chomp $comment; $comment = util::line::peel($comment); next if $comment eq $file_comment; $out .= xplor::comment($comment) . "\n"; } } $out .= "\t"; foreach my $pair (@$selections) { my @group_tokens; foreach my $group (@$pair) { my @atom_tokens; foreach my $atom (@$group) { my $num = $atom->[0]; my $name = $atom->[1]; push @atom_tokens, '('."residue $num and name $name" .')'; } push @group_tokens, '('. join(" or \n\t",@atom_tokens) . ")"; } push @pair_tokens, join("\n\t",@group_tokens); } if (scalar @pair_tokens > 1) { @pair_tokens = map {'('.$_.')'} @pair_tokens; } $out .= join("\n\tOR ",@pair_tokens); $out .= sprintf "%5.2f%5.2f%5.2f\n", $dist, $dminus, $dplus; return $out; } sub xplor::grab_selection #take out a first occuring set of parenthesized substring working from the begginning #to the end of the input string #input string is given by reference { my $input = shift; #print $$input, "\n\n"; my $orig = $$input; my $out; #here i assume that first line starts with opening parenthesis #die "premature end of file" if scalar(@$input) == 0; if ($$input !~ /^$/) { die "selection must start with a '(': '$$input'"; } my $paren = 0; my @C = split /|/, $$input; my $sel = ''; while (@C) { my $char = shift @C; $sel .= $char; if ($char eq '(') { $paren++; } elsif ($char eq ')') { $paren--; } #if ($paren<0) { die "too many right parentheses in $line"; } if ($paren == 0) { my $tail = join('',@C); $tail =~ s/^\s+//; $$input = $tail; return $sel; } } if ($orig eq $$input) { #xplor::grab_selection failed die "could not find selection in '$orig'"; } return $out; } sub xplor::parse_hnha_rest { my ($input,$line) = @_; # prepare output my @lines; push @lines, $line; for (my $i=0; $i[$i] !~ /ASSI/i; $i++) { push @lines, $input->[$i]; } my @comments; foreach (@lines) { chomp; if ($_ =~ /(!.*)$/) { push @comments, $1; } s/^([^!]*)!.*$/\1/; s/\(/ ( /g; s/$/ ) /g; s/\s+/ /g; s/\{.*\}//g;#remove peak label from aria output } my $restraint = join(' ',@lines); $restraint =~ s/^\s*//; $restraint =~ s/assi(g|gn)?\s+//i; my @SEL; my $sel1 = xplor::parse_selection(xplor::grab_selection(\$restraint)); my $sel2 = xplor::parse_selection(xplor::grab_selection(\$restraint)); my $sel3 = xplor::parse_selection(xplor::grab_selection(\$restraint)); my $sel4 = xplor::parse_selection(xplor::grab_selection(\$restraint)); #validate that selections actually correspond to hnha push @SEL, [$sel1,$sel2,$sel3,$sel4]; my $coupling = xplor::grab_number(\$restraint); my $error = xplor::grab_number(\$restraint); my %RESTRAINT; $RESTRAINT{'coupling'} = $coupling; $RESTRAINT{'error'} = $error; $RESTRAINT{'comments'} = \@comments; $RESTRAINT{'selections'} = \@SEL; return \%RESTRAINT; } sub xplor::parse_rest { my ($input,$line) = @_; # prepare output my @lines; push @lines, $line; for (my $i=0; $i[$i] !~ /ASSI/i; $i++) { push @lines, $input->[$i]; } my @comments; foreach (@lines) { chomp; if ($_ =~ /(!.*)$/) { push @comments, $1; } s/^([^!]*)!.*$/\1/; s/$/ ( /g; s/$/ ) /g; s/\s+/ /g; s/\{.*\}//g;#remove peak label from aria output } my $restraint = join(' ',@lines); $restraint =~ s/^\s*//; $restraint =~ s/assi(g|gn)?\s+//i; my @SEL; my $sel1 = xplor::grab_selection(\$restraint); my $sel2 = xplor::grab_selection(\$restraint); push @SEL, [$sel1,$sel2]; my ($dist,$dplus,$dminus) = xplor::grab_dist(\$restraint); my $aria_stuff = xplor::grab_aria(\$restraint); my %RESTRAINT; $RESTRAINT{'dist'} = $dist; $RESTRAINT{'dplus'} = $dplus; $RESTRAINT{'dminus'} = $dminus; $RESTRAINT{'aria'} = $aria_stuff; $RESTRAINT{'comments'} = \@comments; $restraint =~ s/\s+$//;#remove trailing spaces just in case while (length($restraint) > 0) { my $sel1 = xplor::grab_selection(\$restraint); my $sel2 = xplor::grab_selection(\$restraint); push @SEL, [$sel1,$sel2]; xplor::grab_aria(\$restraint);#chomp off the OR } #parsing diagnostics #foreach my $sel (@SEL) #{ # print '####',"\n"; # print $sel->[0],"\n"; # print $sel->[1],"\n"; #} #print join(' * ',($dist,$dplus,$dminus)),"\n"; #print $aria_stuff,"\n"; #print $restraint,"<-this is what's left of the line\n"; #print '--------------------------',"\n"; #return; my @PARSED_SEL; for my $sel (@SEL) { #print $sel->[0],"\n",$sel->[1],"\n"; my $sel1 = xplor::parse_selection($sel->[0]); my $sel2 = xplor::parse_selection($sel->[1]); push @PARSED_SEL, [$sel1,$sel2]; } $RESTRAINT{'selections'} = \@PARSED_SEL; return \%RESTRAINT; } sub xplor::grab_pattern #replace pattern $rg in $$input with nothing #return $1 from $rg or '' if there is no match #make sure that there is zero or one occurance of $rg match in $$input #otherwise die and say something like invalid $label statement { my ($input,$rg,$label) = @_; my $orig = $input; #print "trying to match '$rg' on '$$input' ... "; if ($$input =~ s/$rg//i) { #print 'matched',"\n"; xplor::remove_and($input); my $match = $1; #print $$orig, "\t", $$input; if (defined xplor::grab_pattern($input,$rg,$label)) { die "could not parse: $$orig, too many $label keywords per selection"; } xplor::remove_and($input); #print "result '$match'\n"; return $match; } #print "failed\n"; return undef; } sub xplor::remove_and { my $input = shift; $$input =~ s/^\s+//; $$input =~ s/\s+$//; $$input =~ s/^and//i; $$input =~ s/and$//i; $$input =~ s/^\s+//; $$input =~ s/\s+$//; } sub xplor::split_selection #split xplor selection expression into 'or'ed 'irreducible' selections #that identify residue and atom (or several atoms) on that residue { my $input = shift; my @out;#where resulting selection subexpressions are to be put while (length($input) > 0) { my $new_sel = xplor::grab_selection(\$input); $new_sel =~ s/^$(.*)$$/$1/;#take it out of parentheses $new_sel =~ s/^\s+//; $new_sel =~ s/\s+$//; $input =~ s/^\s+//; $input =~ s/\s+$//; $input =~ s/^or\s+//i; $input =~ s/\s+or$//i; #check new_selection if ($new_sel =~ /^\(/ and $new_sel !~ /^$NAMES_RG/i) { push @out, xplor::split_selection($new_sel); } else { push @out,$new_sel; } } return @out; } sub xplor::parse_selection { my $INPUT = shift; my $orig = $INPUT; my @SEL;#array for parsed selection (residue#/atom pairs) if ($INPUT =~ /^\(/)#selection must start with a '(' { my @unparsed_selections = xplor::split_selection($INPUT); my $name_rg = $NAME_RG; my $names_rg = $NAMES_RG; my $segid_rg = 'segid(?:e|en|enti|entif|entifi|entifie|entifier)?\s+\"([^"]+)\"'; my $resid_rg = 'resi(?:d|du|due)?\s+(\d+)'; foreach my $input (@unparsed_selections) { if ($input =~ /^($names_rg|segid|resi|name)/i) { #hack $input into 'names' or 'name', 'segid' and 'resid' #after that there should be nothing left of the input #print $orig,"\n",$input,"\n"; #print $name_rg,"<-\n"; my $names = xplor::grab_pattern(\$input,$names_rg,'ambiguous name'); #print $names, "<-\n"; my $name = xplor::grab_pattern(\$input,$name_rg,'name'); if (defined $names and defined $name) { die "unexpected selection: $orig\ncould not parse out atom names"; } $names = defined $names ? $names:$name; if (not defined $names) { die "could not parse atom names in: $orig"; } #print $names, "<-\n"; my $names = xplor::parse_names($names); my $segid = xplor::grab_pattern(\$input,$segid_rg,'segid');#segid is not used for now #however it can be incorporated into @SEL as additional element my $resid = xplor::grab_pattern(\$input,$resid_rg,'resi'); #print "selection: $orig\n"; #print "atoms: ",join(',',@$names),"\n"; #print "segid: ",$segid,"\n"; #print "resi: $resid\n"; #print '------------------',"\n"; foreach my $name (@$names) { push @SEL, [$resid,$name]; } if (length($input) > 0) { die "could not entirely parse selection: $orig\n". "remainder: '$input'"; } } else { die "invalid or unsupported format selection: '$input'"; } } } else { die "invalid selection expression: $INPUT"; } return \@SEL; } sub xplor::parse_name { my $input = shift; $input =~ s/name\s+//i; return $input; } sub xplor::parse_names { my $input = shift; my @names; if ($input =~ /^$NAMES_RG$/i) { $input =~ s/^$//; $input =~ s/$$//; $input =~ s/^\s+//; $input =~ s/\s+$//; @names = map {xplor::parse_name($_)} split(/\s+or\s+/i, $input); #print "parsing names: $input -> ", join(',',@names),"\n"; } elsif ($input =~ /^$NAME_RG$/i) { push @names, xplor::parse_name($input); } return \@names; } sub xplor::grab_number { my $input = shift; my $numregex = '(?:(?:[1-9]\d*(?:\.\d+)?)|(?:0?\.\d+)|0)'; if ($$input =~ s/($numregex)(.*)$/$2/) { my $number = $1; $$input =~ s/^\s+//; return $number; } else { die "a number expected in the beginning on this: ",$$input; } } sub xplor::grab_dist { my $input = shift; #print $$input,"<-\n\n"; my $numregex = '(?:(?:[1-9]\d*(?:\.\d+)?)|(?:0?\.\d+)|0)'; if ($$input =~ s/($numregex)\s+($numregex)\s+($numregex)(.*)$/$4/) { my ($dist,$dminus,$dplus) = ($1,$2,$3); $$input =~ s/^\s+//; return ($dist,$dplus,$dminus); } else { die "three numbers expected in beginning on this: ",$$input; } } sub xplor::grab_aria { my $input = shift; if ($$input =~ /^or\s(.*)$/i) { $$input = $1; $$input =~ s/^\s+//; return ''; } elsif ($$input =~ /^(.*?)\sor\s(.*)$/i) { my $aria = $1; my $tail = $2; $$input = $tail; $$input =~ s/^\s+//; $aria =~ s/^s+//; $aria =~ s/s+$//; return $aria; } return ''; } sub xplor::read_range # read range as provided in command line arguments and form two # element array from it { my $in = shift; my @range; if ($in =~ /^\d+(\.[A-Z]{1,2}\d{0,2}[#%*]?)?$/i) # one residue matching make fake two element array { @range = ($in,$in); } elsif ($in =~ /^\d+\-\d+$/) # two residue matching # produce sorted in ascending order array with residue numbers { @range = sort {$a<=>$b} split( /-/, $in); } else { die "incorrect range $in"; } return @range; } sub xplor::wk_to_regex { my $wk = shift; if ($wk =~ /^(.*)\%$/) { return "^$1.\$"; } if ($wk =~ /^(.*)\[#*]$/) { return "^$1.*\$"; } return $wk; } sub xplor::inrange { #what is residue number #atom is atom name #where is range my ($what,$atom,$where) = @_; #print "$what $atom ", @$where, "\n"; my ($num1,$atom1) = xplor::parse_def($where->[0]); my ($num2,$atom2) = xplor::parse_def($where->[1]); if ($what >= $num1 and $what <= $num2) { if (defined $atom1) { my $regex = xplor::wk_to_regex($atom1); if ($atom =~ /$regex/) { return 1; } return 0; } elsif (defined $atom2) { my $regex = xplor::wk_to_regex($atom2); if ($atom =~ /$regex/i) { return 1; } return 0; } else { return 1; } } return 0; } sub xplor::parse_def { my $what = shift; $what =~ /^(\d+)(\.[A-Z]{1,2}\d{0,2}[#%]?)?$/i; my $num = $1; my $atom = $2; $atom =~ s/^\.//; return ($num,$atom); } sub xplor::interval_inrange { my ($ar1,$SEL1,$SEL2) = @_; return 0 if scalar(@$ar1) != 2; return 0 if scalar(@{$SEL1->[0]}) != 2; return 0 if scalar(@{$SEL2->[0]}) != 2; my @range1 = xplor::read_range($ar1->[0]); my @range2 = xplor::read_range($ar1->[1]); foreach my $sel1 (@$SEL1) { foreach my $sel2 (@$SEL2) { #print join(',',@$sel1),"\n"; #print join(',',@$sel2),"\n"; # try matching atoms to ranges in two ways return 1 if (xplor::inrange($sel1->[0],$sel1->[1],\@range1) and xplor::inrange($sel2->[0],$sel2->[1],\@range2)); return 1 if (xplor::inrange($sel1->[0],$sel1->[1],\@range2) and xplor::inrange($sel2->[0],$sel2->[1],\@range1)); } } # returning zero if matching in both ways failed return 0; } sub xplor::grabsel { my ($in,$errmsg) = @_; my $parendepth = 1; my @sel; my $token = util::array::squeeze($in,$errmsg); if ($token ne '(') { app::error("selection expected after ASSIgn token ". "or { PEAKNO } clause $token found"); } push @sel, $token; while ($parendepth > 0 and scalar(@$in) > 0) { my $token = shift @$in; if ($token eq '(') { $parendepth++; } elsif ($token eq ')') { $parendepth--; } push @sel, $token; } return \@sel; } sub xplor::print # print restraint in xplor format { my $rest = shift; my $upl = $rest->{upl}; my $gr1 = $rest->{atom_group1}; my $gr2 = $rest->{atom_group2}; my $comments = $rest->{comments}; print "assign\n"; if (defined $comments) { foreach my $comment (@$comments) { print "! $comment\n"; } } print "(\n"; print join(" or \n", map {"\t( resid $_->{resno} and name $_->{atom} )"} @$gr1), "\n"; print ")\n"; print "(\n"; print join(" or \n", map {"\t( resid $_->{resno} and name $_->{atom} )"} @$gr2), "\n"; print ")\n"; my $vdw = 1.8; printf "%4.2f %4.2f %4.2f\n",$upl,$upl-$vdw,0; print "\n"; return; } sub xplor::parsesel { my $sel = shift; my ($segid,$resno,$atom); while ($sel->[0] eq '(' and $sel->[-1] eq ')') { shift @$sel; pop @$sel; } $segid = ''; while ($sel->[0] !~ /^RESID/ and scalar(@$sel) > 0) { shift @$sel; } if (scalar(@$sel) < 4) { app::error("could not parse xplor atom selection at ". join(' ',@$sel)); } shift @$sel; $resno = shift @$sel; util::assert(sub{util::scalar::isnum($resno)}, "number expected for residue #, $resno found"); my $tmp = shift @$sel; util::assert(sub{$tmp eq 'AND'}, "word 'AND' expected $tmp found"); # todo here add parsing of expression type of name my $tmp = shift @$sel; util::assert(sub{$tmp eq 'NAME'}, "word 'NAME' expected $tmp found"); my $atom = shift @$sel; return ($segid,$resno,$atom); } sub xplor::globatoms { my ($glob,$nolib) = @_; $nolib = 'false' if not defined $nolib; my ($res,$num,$atom); if ($glob =~ /,/) { $glob =~ /^([A-Z]|[A-Z]{3})(\d+)\.(.+\,)(.*)$/; $res = $1; $num = $2; $atom = $4; } else { $glob =~ /^([A-Z]|[A-Z]{3})(\d+)\.(.*)$/; $res = $1; $num = $2; $atom = $3; } #if ($atom !~ /[A-Z]+\d{0,2}[#%]?((\|[A-Z]+\d{0,2}[#%]?)*(\|[A-Z]+\d{0,2}[#%]?))?$/) #atom name pattern if ($nolib eq 'false')#use library validation { my $ap = '[A-Z]+\d{0,2}(?:[\'][#%]?|[\']{0,2}|[#%]?)'; if ($atom !~ /^$ap(\|$ap)*$/) { app::warning("strange atom $atom in label $glob"); return undef; } if (length($res) != 1) { app::warning("strange residue $res in label $glob, ". "only understand one letter aminoacid codes"); return undef; } } my %atom = (atom=>$atom,residue=>$res.$num,resno=>$num); if ($nolib eq 'true') { return [\%atom]; } elsif($nolib eq 'false') { return bio::aminoacid::globatoms(\%atom,'xplor'); } } # a little subroutine that converts HB# xplor syntax # to standard regular expression syntax sub xplor::wk2regex { my $input = shift; if ($input =~ /\|/) { my @opt = split /\|/, $input; my $out = join(')|(',map {xplor::wk2regex($_)} @opt); return "($out)"; } my $R = '[%#.+*]'; my $NR = "[^%#.+*]"; $input =~ /^($NR+)($R)?$/; my $base = $1; my $ext = $2; my $regex; if ($ext eq '%') { $regex = $base . '.'; } elsif ($ext eq '#') { $regex = $base . '\d+'; } elsif ($ext eq '+') { $regex = $base . '\d'; } elsif ($ext eq '*') { $regex = $base . '.*'; } else { $regex = $base; } return $regex; } sub xplor::atomname { my $name = shift; if ($name =~ /\|/) { $name = '(name ' . join("\n\t\t\t\t\tor name ", split('\|',$name)) . ')'; } else { $name = "name $name"; } return $name; } package internal; package internal::noe; sub internal::noe::peak::calcdist { return 5.0; } sub internal::noe::guessSpecByHet { my $het = shift; my $atom = $het->{atom}; my %d = internal::spectrum::defaults::axis(); my $types = $d{atomtypes}; my @specs = keys %$types; my @poss; foreach my $spec (@specs) { my $regex = $types->{$spec}{Y}; push @poss, $spec if $atom =~ /$regex/; } if (scalar(@poss) > 1) { app::message("too many types of spectra match primary ". "heteroatom $atom"); return undef; } elsif (scalar(@poss) == 0) { app::message("none of the types of spectra match ". "primary heteroatom $atom"); return undef; } else { return $poss[0]; } } sub internal::noe::aria { my ($pairs,$sp,$chir) = @_; my $started = 0; my $output; foreach my $pair (@$pairs) { my ($p1,$p2) = @$pair; my $peak1 = $p1->{peak}; my $ass1 = $p1->{ass}; my $spec1 = internal::peak::whatspectrum( $p1->{peak}); my $atom1 = internal::noe::anchorreson($sp, $spec1,$ass1); my $dist1 = internal::noe::peak::calcdist($peak1,$sp); my $peak2 = $p2->{peak}; my $ass2 = $p2->{ass}; my $spec2 = internal::peak::whatspectrum( $p2->{peak}); my $atom2 = internal::noe::anchorreson($sp, $spec2,$ass2); my $dist2 = internal::noe::peak::calcdist($peak2,$sp); my $dist = util::array::max([$dist1,$dist2]); my $peakid = sprintf "%5d", $peak1->{id}; if ($started == 0) { $output = ' ASSI '; } else { $output .= ' OR '; } if (defined $dist1 or defined $dist2) { $output .= "\n"; } else { $output .= "{$peakid}\n" } my $name1 = $atom1->{atom}; my ($residue1,$resnum1) = bio::protein::sequence::idres($atom1->{residue}); my $name2 = $atom2->{atom}; my ($residue2,$resnum2) = bio::protein::sequence::idres($atom2->{residue}); if ($chir eq 'true') { $name1 = bio::protein::aminoacid::atom::scramble($name1,$residue1); $name2 = bio::protein::aminoacid::atom::scramble($name2,$residue2); } # convert | in ambiguous assignment to or statements $name1 = xplor::atomname($name1); $name2 = xplor::atomname($name2); $output .= " (( segid \" \" and resid " . $resnum1 . " and $name1 ))\n"; $output .= " (( segid \" \" and resid " . $resnum2 . " and $name2 ))\n"; if ($started == 0) { my ($dist,$dminus,$dplus); if (defined $dist1 or defined $dist2) { $dist = util::array::max([$dist1,$dist2]); $dminus = $dist - 1.7; $dplus = 0; if ($dminus < 0) { app::warning("pair of symmetry related peaks at ". internal::peak::text::pos($peak1) . ' and '. internal::peak::text::pos($peak2) . ' '. "assigned as $residue1\.$name1; $residue2\.$name2 ". "yielded strange restraint length $dist"); } } else { $dist = 6.0; $dminus = 0.1; $dplus = 0.1; } $output .= sprintf "%12.3f%10.3f%10.3f ", $dist, $dminus, $dplus; $output .= "\n" if (defined $dist1 or defined $dist2); if (not defined $dist1 and not defined $dist2) { my $vol1 = abs($peak1->{integral}); my $vol2 = abs($peak2->{integral}); my $vol = util::array::min([$vol1,$vol2]); # todo important here get actual peak frequency not # ass freqs as below my $ppm1 = $atom1->{'shift'}; my $ppm2 = $atom2->{'shift'}; $output .= "peak $peakid spectrum 1 weight 0.100000E+01 "; $output .= sprintf "volume %12.5e ppm1 %10.3f ", $vol, $ppm1; $output .= sprintf "ppm2 %10.3f CV %6d\n", $ppm2, 1; } $started = 1; } } return $output; # trying to give output like following # ASSI { 64} # (( segid " " and resid 34 and name HA )) # (( segid " " and resid 34 and name HB2 )) # 2.900 1.100 1.100 peak 64 spectrum 1 weight 0.10000E+01 volume 0.20389E-02 ppm1 5.788 ppm2 1.622 CV 1 # OR { 64} # (( segid " " and resid 34 and name HA )) # (( segid " " and resid 34 and name HG3 )) } sub internal::noe::text { my ($p1,$p2,$sp) = @_; my $ass1 = $p1->{ass}; my $ass2 = $p2->{ass}; my $out = internal::peak::text::ass($p1); $out .= "\t"; $out .= internal::peak::text::ass($p2); my $assdev1 = internal::peak::assdev($p1->{peak},$ass1,$sp); my $assdev2 = internal::peak::assdev($p2->{peak},$ass2,$sp); $out .= sprintf "%6.2f", $assdev1; $out .= sprintf "%6.2f", $assdev2; return $out; } sub internal::noe::getdist { my ($peak1,$peak2,$str,$sp) = @_; my $ass1 = $peak1->{ass}; my $spec1 = $peak1->{peak}{spectrum}; my $atomdef1 = internal::noe::anchorreson($sp,$spec1,$ass1); my $atom1 = bio::aminoacid::getambigatoms($atomdef1); my $ass2 = $peak2->{ass}; my $spec2 = $peak2->{peak}{spectrum}; my $atomdef2 = internal::noe::anchorreson($sp,$spec2,$ass2); my $atom2 = bio::aminoacid::getambigatoms($atomdef2); # generate all possible couples from arrays @$atom2 and @$atom2 my $opt = util::array::tuples($atom1,$atom2); my $try = shift(@$opt); my $dist = bio::structure::getmindist($str, $try->[0], $try->[1]); foreach my $try (@$opt) { my $d = bio::structure::getmindist($str, $try->[0], $try->[1]); next if $d == 0; $dist = $d if $d < $dist; } return $dist; } sub internal::noe::anchorreson # find proton connected to primary geteroatom { my ($sp,$name,$ass) = @_; my $dim = internal::spectrum::axis::getdimnum($sp,$name,'X'); return $ass->[$dim]; } sub internal::noe::leadreson # find proton 'not connected' to heteroatom # or the one connected to secondary heteroatom { my ($sp,$name,$ass) = @_; my $dim = internal::spectrum::axis::getdimnum($sp,$name,'Z'); return $ass->[$dim]; } package bio::atom; sub bio::atom::new { my ($res,$atom,$pos) = @_; my ($aa,$resnum) = bio::protein::sequence::idres($res); my $atom = { atom=>$atom, residue=>$res, residue_name=>$aa, resno=>$resnum, seq_pos=>$resnum, 'pos'=>$pos }; return $atom; } package internal::resonance; sub internal::resonance::organize #organize raw resonance list by aminoacid #and by atom group (HB, HG HG2 and those defined by ambig_regex in nomenclature #return ref to hash keyed by residue like D34 #hash values are hash tables keyed by group, and valued by arrays of atoms in the group { my $res = shift; my %res; #group atoms by residues first foreach my $atom (@$res) { my $residue = $atom->{residue}; push @{$res{$residue}}, $atom; } my %nom = bio::protein::aminoacid::atom::nomenclature(); foreach my $residue (keys %res) { my $group = $res{$residue}; my %groups; foreach my $atom (@$group) { my $residue = $atom->{residue}; my $name = $atom->{atom}; # figure out atoms group name my ($aa,$no) = bio::protein::sequence::idres($residue); my $regex = xplor::wk2regex($name);#convert atom wildcard to regex my $alist = $nom{$aa}; my @members; my %class; my $group_name = undef; foreach my $a (@$alist) { my $iupac = $a->{iupac}; my $ambig = $a->{ambig_regex}; if (not defined $ambig) { if ($iupac =~ /\d$/) { $ambig = substr($iupac, 0, length($iupac) - 1); } else { $ambig = $iupac; } } if ($iupac =~ /^$regex$/) { $class{$ambig}++; } my @classes = keys %class; if (scalar(keys %class) == 1) { $group_name = $classes[0]; } } # if defined stash this atom into %groups structure if (defined $group_name) { push @{$groups{$group_name}}, $atom; } } # replace flat array with a hash organized by atom group name $res{$residue} = \%groups; } return \%res; } sub internal::resonance::findfreq # find frequency of matching resonance # report first matched resonance and dont check # if there are more of them that match { my ($res,$ass) = @_; foreach my $as (@$ass) { my @at1 = split /\|/, $res->{atom}; my @at2 = split /\|/, $as->{atom}; if (util::array::somein(\@at1,\@at2) and $res->{residue} eq $as->{residue}) { return $as->{'shift'}; } } } sub internal::resonance::fold # fold ass table to spectral window { my ($ass,$ref,$sw,$atoms) = @_; my $half = 0.5 * $sw; $atoms = '.*' if not defined $atoms; foreach my $res (@$ass) { if ($res->{atom} =~/^$atoms/) { my $shift = $res->{'shift'}; my $diff = $shift - $ref; if ($diff > $half) { my $n = 1 + int (abs($diff)/$sw - 0.5); $shift -= $n * $sw; } elsif ($diff < -1*$half) { my $n = 1 + int (abs($diff)/$sw - 0.5); $shift += $n * $sw; } $res->{'shift'} = $shift; } } } sub internal::resonance::findclose { my ($coor,$ass,$tol) = @_; my @close = (); # todo implement smarter search here # which would assume that resonance table is sorted foreach my $a (@$ass) { push @close, $a if abs($coor - $a->{'shift'}) < $tol; } return \@close; } sub internal::resonance::calculate { my $peaks = shift; my %res; foreach my $peak (@$peaks) { next if not $peak->{ass}; my $pos = $peak->{'pos'}; my $ass = $peak->{ass}; die 'oops...' if scalar(@$ass) != scalar(@$pos); my $num = scalar @$ass; for (my $i=0; $i<$num; $i++) { my $a = $ass->[$i]; my $p = $pos->[$i]; my $label = toolkit::getlabel($a); $res{$label}{ass} = $a; push @{$res{$label}{'pos'}}, $p; } } foreach my $label (keys %res) { my $pos = $res{$label}{'pos'}; my $val; foreach (@$pos) { $val += $_; #todo calculate stdev here } $val /= scalar(@$pos); $res{$label}{'center'} = $val; } return \%res; } sub internal::resonance::seqfilt # open protein sequence file # and throw away resonances that dont belong # to the peptide (ideally...) { my ($ass, $seq) = @_; my @seq = bio::protein::sequence::read($seq); # throw away resonances not in compliance with the my $lim = $seq[0]; my @aa; for (my $i = 1; $i <= $lim; $i++) { push @aa, $seq[$i] . $i; } # peptide sequence my @clean; foreach my $reson (@$ass) { if (util::array::isin($reson->{residue}, \@aa)) { push @clean, $reson; } } return \@clean; } package internal::spectrum; # deals with spectral data in internal format # spectrum 'object' is a hash reference; # with the following elements: # 'type'=>(single|multiple) # 'spectra'=>{name=>[peaks]} sub internal::spectrum::peak::findclose { my ($sp,$peak) = @_; my $spectrum = internal::peak::whatspectrum($peak); my $PK = internal::spectrum::getpeaks($sp,$spectrum); my $tol = internal::spectrum::axis::gettol($sp,$spectrum); my $dim = internal::spectrum::dim($sp,$spectrum); my @axtbl; for (my $i=0; $i<$dim;$i++) { $axtbl[0][$i] = $i; $axtbl[1][$i] = $i; } my @TOL; for (my $i=0; $i<$dim;$i++) { push @TOL, $tol->{$i}; } my @close; foreach my $P (@$PK) { if (math::point::isnear($peak,$P,\@TOL,\@axtbl)) { push @close, $P; } } if (scalar(@close)==0) { return undef; } return \@close; } sub internal::spectrum::initass { my $sp = shift; my $ass = internal::spectrum::getdata($sp,'ASS'); my @names = internal::spectrum::names($sp); foreach my $name (@names) { my $ax = internal::spectrum::getaxispar($sp,$name); my %d = internal::spectrum::defaults::axis(); my $atoms = $d{atomtypes}{$name}; my $axdef = $d{axdef}{$name}; die "internal axis definition not found for spectrum '$name'" if not defined $axdef; # here check whether this operation is required at all my %lab2xyz = reverse %$axdef; foreach $a (keys %$ax) { my $lab = $ax->{$a}{'label'}; my $xyz = $lab2xyz{$lab}; my $regex = $atoms->{$xyz}; #util::print($name,$lab,$xyz,$regex); my @axass; # filter ass table against atom name regexes foreach my $as (@$ass) { if ($as->{atom} =~ /$regex/) { push @axass, $as; } } # and fold ass table into the spectral width of # this particular axis my $ref = $ax->{$a}{'ref'}; my $sw = $ax->{$a}{'sw'}; internal::resonance::fold(\@axass,$ref,$sw); # store the folded ass table ass additional axis parameter $ax->{$a}{'ASS'} = \@axass; } } } sub internal::spectrum::init { my $in = shift; my $sp; if (-f $in) { $sp = internal::spectrum::read($in); } else { $sp = internal::spectrum::new(); } return $sp; } sub internal::spectrum::printinfo { my $SP = shift; my @spectra = internal::spectrum::names($SP); my $NUM; foreach my $sp (@spectra) { my $num = scalar(@{$SP->{spectra}{$sp}}); printf "%-12s%-5d peaks\n", $sp, $num; $NUM += $num; } printf "%-12s%-5d peaks\n", 'total', $NUM; } sub internal::spectrum::new { my $SP = {type=>'single'}; return $SP; } # todo rename it to # internal::spectrum::userdata::set sub internal::spectrum::setdata { my ($sp,$dataname,$data) = @_; $sp->{userdata}{$dataname} = $data; return; } sub internal::spectrum::getpeak { my ($sp,$spectrum,$peakno) = @_; my $peaks = internal::spectrum::getpeaks($sp,$spectrum); foreach my $peak (@$peaks) { return $peak if $peak->{id} == $peakno; } return undef; } # todo rename it to # internal::spectrum::userdata::get sub internal::spectrum::getdata { my ($sp, $dataname) = @_; my $data=$sp->{userdata}{$dataname}; if (not defined $data) { app::error("record $dataname not found in dataset"); } return $data; } sub internal::spectrum::userdata::names { my $sp = shift; my @keys = keys %{$sp->{userdata}}; return \@keys; } sub internal::spectrum::axis::getdimnum { my ($sp,$name,$dim) = @_; die "dim must be one of XYZA" if $dim !~ /^[AXYZ]$/; my $num = internal::spectrum::axis::getlabels($sp,$name); my %d = internal::spectrum::defaults::axis(); my $axdef = $d{axdef}{$name}; if (not defined $axdef) { die "axis definition not given for spectrum '$name'"; } return $num->{$axdef->{$dim}}; } sub internal::spectrum::setpkname { my ($sp,$name) = @_; my $peaks = internal::spectrum::getpeaks($sp,$name); foreach my $peak (@$peaks) { $peak->{spectrum} = $name; } } sub internal::spectrum::rename { my ($sp,$from,$to) = @_; my $peaks = $sp->{spectra}{$from}; $sp->{spectra}{$to} = $peaks; $sp->{spectra}{$from} = undef; delete $sp->{spectra}{$from}; my $peaks = internal::spectrum::getpeaks($sp,$to); foreach my $peak (@$peaks) { $peak->{spectrum} = $to; } my $pars = $sp->{parameters}{$from}; $sp->{parameters}{$to} = $pars; $sp->{parameters}{$from} = undef; delete $sp->{parameters}{$from}; } sub internal::spectrum::write { my ($sp,$file,$par) = @_; util::savedata($sp,$file,$par); } sub internal::spectrum::setaxis { my ($sp,$name,$ax) = @_; my $axpar = internal::spectrum::getaxispar($sp,$name); %$axpar = %$ax; return; } sub internal::spectrum::setaxispar { my ($sp,$arg) = @_; my $name = $arg->{-spectrum}; my $axis = $arg->{-axis}; my $tol = $arg->{-tol}; my $fold = $arg->{-fold}; my $unfold = $arg->{-unfold}; my $ref = $arg->{'-ref'}; my $label = $arg->{-label}; my $sw = $arg->{-sw}; my $lim = $arg->{-lim}; my %d = internal::spectrum::defaults::axis(); $axis =~ tr/xyz/XYZ/; if ($axis =~ /^[XYZA]$/) { my $tr = $d{xyz2num}; my $num = $tr->{$axis}; my $axpar = internal::spectrum::getaxispar($sp,$name); util::scalar::setif(\$axpar->{$num}{tol},$tol); util::scalar::setif(\$axpar->{$num}{label},$label); util::scalar::setif(\$axpar->{$num}{sw},$sw); util::scalar::setif(\$axpar->{$num}{hippm}, util::array::max($lim)); util::scalar::setif(\$axpar->{$num}{loppm}, util::array::min($lim)); if ($fold eq 'true') { $axpar->{$num}{fold} = 'true'; } elsif ($unfold eq 'true') { $axpar->{$num}{fold} = 'false'; } util::scalar::setif(\$axpar->{$num}{'ref'},$ref); } else { app::error("value $axis not understood, only ". util::word::join(keys %{$d{xyz2num}}) . ' are allowed'); } } sub internal::spectrum::getaxispar { my ($sp,$name) = @_; if (not defined $sp->{parameters}{$name}{axes}) { $sp->{parameters}{$name}{axes} = {}; } return $sp->{parameters}{$name}{axes}; } sub internal::spectrum::defaults::axis { my %d; $d{num2xyz} = {0=>'X',1=>'Y',2=>'Z',3=>'A'}; $d{noebase} = {nnoe=>['HN','H'],cnoe=>['H1','H2']}; # NOTE here well: # convention here is that X and Y are covalently # connected atoms in noe spectra # maybe it should also hardcoded somewhere in %d hash # also make X correspond to anchor atom $d{axdef} = { nnoe=>{ X=>'HN',Y=>'N',Z=>'H' }, cnoe=> { X=>'H1',Y=>'C',Z=>'H2' } }; # atom types are regular expressions that will match for atoms # labeled in iupac nomenclature # relevant to a given axis in the noesy spectrum $d{atomtypes} = { nnoe=>{ X=>'^H$',Y=>'^N',Z=>'^H' }, cnoe=>{ X=>'^H.+',Y=>'^C.+',Z=>'^H' } }; $d{noeconn} = {nnoe=>['HN','N'],cnoe=>['H1','C']}; $d{xyz2num} = { reverse(%{$d{num2xyz}}) }; return %d; } sub internal::spectrum::printaxes { my ($sp,$name) = @_; my @center = internal::spectrum::center($sp,$name); my $dim = internal::spectrum::dim($sp,$name); my %d = internal::spectrum::defaults::axis(); my $lab = $d{num2xyz}; my $axes = internal::spectrum::getaxispar($sp,$name); for (my $i = 0;$i<$dim;$i++) { print "axis $lab->{$i}:\n"; printf "\taverage shift %6.2f\n", $center[$i]; if (defined $axes->{$i}) { my $sw = $axes->{$i}{sw}; my $tol = $axes->{$i}{tol}; my $label = $axes->{$i}{label}; my $ref = $axes->{$i}{'ref'}; my $fold = $axes->{$i}{fold}; my $hi = $axes->{$i}{hippm}; my $lo = $axes->{$i}{loppm}; printif('label',$label); printif('spectral width',$sw); printif('origin offset',$ref); printif('tolerance',$tol); if ($fold eq 'true') { printf "\t%-15s%s\n",'folded','YES'; } elsif ($fold eq 'false') { printf "\t%-15s%s\n",'folded','NO'; } printif('low ppm',$lo); printif('high ppm',$hi); } } sub printif { my ($a,$b) = @_; if (defined $a and defined $b) { printf "\t%-15s%s\n",$a,$b; } } } sub internal::spectrum::printpar { my ($sp,$name) = @_; my $par = internal::spectrum::getpar($sp,$name); if (defined $par) { print "parameters:\n"; foreach my $p (keys %$par) { print "$p\t$par->{$p}\n"; } } else { print "spectrum $name has no associated parameters\n"; } } sub internal::spectrum::names { my $SP = shift; return keys %{$SP->{spectra}}; } sub internal::spectrum::parseaxes { my %arg = @_; my $tol = $arg{-tol}; my $all = $arg{-collection}; my $name = $arg{-name}; my %tol; my %limits = (H=>[-5,10],C=>[20,80],N=>[100,130],CO=>[140,200]); $tol = util::array::insure($tol); my $type = internal::spectrum::axis::deftype($tol); if ($type eq 'explicit') { foreach my $bit (@$tol) { $bit =~ /^ax([1-9])=(.*)$/; my $axis = $1; my $num = $2; $tol{$axis} = $num; } } elsif ($type eq 'atom_based') { if (not defined $name) { die "internal: spectrum name must be given"; } my @center = internal::spectrum::center($all,$name); my %t; foreach my $bit (@$tol) { $bit =~ /^([HN]|CO|C)=(.*)$/; $t{$1} = $2; } for (my $i=0;$i{$a}{label}; } my %out = reverse %lbl; if (scalar keys %out != scalar keys %lbl) { die "same axis repeated in spectrum $name"; } return \%out; } sub internal::spectrum::axis::gettol { my ($sp, $name) = @_; my $ax = internal::spectrum::getaxispar($sp,$name); my @ax = keys %$ax; my %tol; foreach my $a (@ax) { $tol{$a} = $ax->{$a}{tol}; } return \%tol; } sub internal::spectrum::axis::deftype { my $tol = shift; if (util::array::allmatch($tol,'^ax[1-9]=')) { return 'explicit'; } elsif (util::array::allmatch($tol,'^([HCN]|CO)=\d+(\.?\d+)?$')) { return 'atom_based'; } else { return undef; } } sub internal::spectrum::filter::ass { my ($sp, $ass) = @_; my $ass = sparky::readass($ass,'strict'); my @names = internal::spectrum::names($sp); # make sure that all noe spectra in file were # symmetry filtered before my @spectra = util::array::thosein(\@names, ['nnoe','cnoe']); # try to assign every peak internal::spectrum::apply($sp,\@spectra,$internal::peak::{noeass},$sp,$ass); my $mark = 0; # mark all peaks with unique number internal::spectrum::apply( $sp,\@spectra, sub{$_[0]->{mark} = ${$_[1]}++}, \$mark ); # collect unique symmetry couples my %symm; internal::spectrum::apply( $sp, \@spectra, sub { my ($peak,$list) = @_; my $symm = $peak->{symm}; return if not defined $symm or scalar @$symm == 0; my $from = $peak->{mark}; foreach my $other (@$symm) { my $to = $other->{mark}; my $label = join('-',sort($from,$to)); $list->{$label} = [$peak,$other]; } }, \%symm); # remove symmetry pairs that could not be assigned to anything foreach my $key (keys %symm) { my ($p1,$p2) = @{$symm{$key}}; my $n1 = scalar (@{$p1->{noeass}}); my $n2 = scalar (@{$p2->{noeass}}); if ($n1==0 or $n2 == 0) { $symm{$key} = undef; delete $symm{$key}; next; } #my $t1 = internal::peak::text($p1); #my $t2 = internal::peak::text($p2); #print "$n1\@$t1\t$n2\@$t2\n"; #map {printass($_)} @{$p1->{noeass}}; #print "---\n"; #map {printass($_)} @{$p2->{noeass}}; } my %SYMM; # final list of symmetry peaks foreach my $key (keys %symm) { my ($peak1,$peak2) = @{$symm{$key}}; my $noe1 = $peak1->{noeass}; my $noe2 = $peak2->{noeass}; foreach my $as1 (@$noe1) { foreach my $as2 (@$noe2) { # debugging code that prints assignments # use it if you want to check correctnss of # symmetry assignment detection #map {print $_->{atom} , '@' , $_->{residue}, ' '} @$as1; #print " X "; #map {print $_->{atom} , '@' , $_->{residue}, ' '} @$as2; if (internal::peak::ass::issymm( ass1=>{peak=>$peak1,ass=>$as1}, ass2=>{peak=>$peak2,ass=>$as2}, spectrum=>$sp) ) { my @pair; $pair[0] = {peak=>$peak1,ass=>$as1}; $pair[1] = {peak=>$peak2,ass=>$as2}; push @{$SYMM{$key}}, \@pair; #print "=====>symm!!!\n"; } else { #print "=====>not symm!!!\n"; } } } } sub printass { my $ass = shift; foreach my $at (@$ass) { my $shift = $at->{'shift'}; my $atom = $at->{atom}; my $residue = $at->{residue}; print join('*',($shift,$atom,$residue)), "\t"; } print "\n"; } my $numpeaks = scalar keys %SYMM; my $ambig; my $avedev = 0; my $mindev = 1000000; my $maxdev = 0; foreach my $key (keys %SYMM) { $ambig += scalar(@{$SYMM{$key}}); foreach my $symm (@{$SYMM{$key}}) { my $peak1 = $symm->[0]{peak}; my $peak2 = $symm->[1]{peak}; my $dist = internal::peak::symmdist($peak1,$peak2,$sp); my $d = util::array::sum($dist)/2; my $avedev += $d; $maxdev = $d if $d > $maxdev; $mindev = $d if $d < $mindev; } } print "\n"; print util::line::wrap(-line=>"Found $numpeaks symmetry related peak(s) ". "consistent with existing assignments"); print "\n"; if ($numpeaks > 0) { $ambig /= $numpeaks; $avedev /= $numpeaks; my $line = sprintf "Average number of alternative assignments per ". "symmetry pair is %4.2f",$ambig; print util::line::wrap(-line=>$line),"\n"; $line = sprintf "Average symmetry deviation %4.2f ppm",$avedev; print util::line::wrap(-line=>$line),"\n"; $line = sprintf "Minimum symmetry deviation %4.2f ppm",$mindev; print util::line::wrap(-line=>$line),"\n"; $line = sprintf "Maximum symmetry deviation %4.2f ppm",$maxdev; print util::line::wrap(-line=>$line),"\n"; print "\n"; } return \%SYMM; } sub internal::spectrum::apply # apply code with arguments to all peaks in a list of spectra { my ($sp, $range, $code, @args) = @_; foreach my $spectrum (@$range) { my $peaks = internal::spectrum::getpeaks($sp,$spectrum); foreach my $peak (@$peaks) { $code->($peak,@args); } } } sub internal::spectrum::filter::isolated # special filter to find peaks resolved within tolerance { my ($SP,$tol,$method) = @_; my $ax = internal::spectrum::axis::deftype($tol); if ($SP->{type} eq 'multiple' and $ax eq 'explicit') { app::error("only one spectrum at a time allowed if axes are ". 'specified explicitly, use atom-based tolerance specs '. 'like H=0.1 C=0.15 N=0.5, etc.'); } my @spectra = internal::spectrum::names($SP); foreach my $name (@spectra) { my %tol = internal::spectrum::parseaxes(-collection=>$SP, -name=>$name, -tol=>$tol); print "spectrum: $name\n"; my @peaks = internal::spectrum::peaks($SP,$name); # now find resolved peaks # 1. Sort peaks by all relevant coordinates my %sortedrefs; foreach my $dim (keys %tol) { my @sorted; foreach my $peak (@peaks) { push @sorted, $peak; } @sorted = sort {$a->{pos}[$dim]<=>$b->{pos}[$dim]} @sorted; my $lim = scalar(@sorted); for (my $i = 0; $i < $lim; $i++) { my $peak = $sorted[$i]; if (not defined $peak->{'index'}) { $peak->{'index'} = {}; } $peak->{'index'}{$dim} = $i; } $sortedrefs{$dim} = \@sorted; } # 2. find resolved peaks my @resolved; my $lim = scalar(@peaks); if ($method eq 'showdup')# mark each peak with individual number { for (my $i=0; $i<$lim; $i++) { $peaks[$i]->{order} = $i; } } my $mark = 0; my @contacts; PEAK: foreach my $peak (@peaks) { my $index = $peak->{'index'}; my @close; foreach my $dim (keys %tol) { # go through all peaks and eliminate those that # are far away by a certain dimension for (my $i = $index->{$dim}; $i < $lim; $i++) { my $pk2 = $peaks[$i]; next if $pk2->{index} == $index; $mark++; my $delta = $pk2->{pos}[$dim] - $peak->{pos}[$dim]; last if (abs($delta) > $tol{$dim}); push @close, $pk2; } for (my $i = $index->{$dim}; $i > 0; $i--) { my $pk2 = $peaks[$i]; next if $pk2->{index} == $index; $mark++; my $delta = $pk2->{pos}[$dim] - $peak->{pos}[$dim]; last if (abs($delta) > $tol{$dim}); push @close, $pk2; } } if ($method eq 'isolated') { foreach my $pk2 (@close) # here we are doing far more work then needed # if there are many resolved peaks in the @close array { $mark++; next PEAK if (internal::peak::near($pk2,$peak,\%tol)); } push @resolved, $peak; } elsif ($method eq 'showdup') { my @correct; foreach my $pk2 (@close) { if (internal::peak::near($pk2,$peak,\%tol)) { push @correct, $pk2; } } push @contacts, [$peak,\@correct]; } else{die}; } if ($method eq 'isolated') { print scalar(@resolved), " resolved peaks found\n"; #my $name = internal::spectrum::name($SP); $SP->{spectra}{$name} = \@resolved; #put resolved peaks back #print "i misfired $mark times\n"; } elsif ($method eq 'showdup') { #make sure that contacts appear only once my %contacts; foreach my $contact (@contacts) { my $peak = $contact->[0]; my $tok1 = $peak->{order}; my $others = $contact->[1]; foreach my $partner (@$others) { my $tok2 = $partner->{order}; my $label = join('-',sort($tok1,$tok2)); $contacts{$label} = [$peak,$partner]; } } #print results print scalar(keys %contacts), ' close contacts found',"\n"; foreach my $cont (keys %contacts) { my ($a,$b) = @{$contacts{$cont}}; if ($a->{spectrum} ne $b->{spectrum}) { die "both peaks must be from same spec"; } my $spec = $a->{spectrum}; $a = internal::peak::text($a); $b = internal::peak::text($b); $a = util::line::peel($a); $b = util::line::peel($b); print "$spec ($a) and ($b)\n"; } }else{die}; } exit 0 if ($method eq 'showdup'); } sub internal::spectrum::exists { my ($SP,$name) = @_; return 1 if defined $SP->{spectra}{$name}; return 0; } sub internal::spectrum::setpar { my ($SP,$name,$par,$val) = @_; $SP->{parameters}{$name}{general}{$par} = $val; } sub internal::spectrum::getpar { my ($SP,$name,$par) = @_; if (defined $par) { return $SP->{parameters}{$name}{general}{$par}; } else { return $SP->{parameters}{$name}{general}; } } sub internal::spectrum::rmpar { my ($SP,$name,$par) = @_; delete $SP->{parameters}{$name}{general}{$par}; return; } sub internal::spectrum::filter { my ($SP, $method,$argv) = @_; if ($method eq 'integrated') { foreach my $spectrum (keys %{$SP->{spectra}}) { my @out; my $in = $SP->{spectra}{$spectrum}; foreach my $peak (@$in) { next if (not defined $peak->{integral}); push @out, $peak; } $SP->{spectra}{$spectrum} = \@out; } } elsif ($method eq 'symm') { my $a = internal::spectrum::exists($SP,'nnoe'); my $b = internal::spectrum::exists($SP,'cnoe'); if (!$a or !$b) { app::error("symmetry peak search requires ". "'nnoe' and 'cnoesy' spectra"); } my $ax = internal::spectrum::getaxispar($SP,'nnoe'); my @required = qw (H HN N); util::assert( $util::array::{allin}, [map {$ax->{$_}{label}} keys %$ax], \@required, "axes ".util::word::join(@required).'are required '. 'for spectrum nnoe' ); util::assert( $util::array::{isnum}, [map {$ax->{$_}{tol}} keys %$ax], 'tolerance definition is required for all axes '. 'for nnoe spectrum' ); $ax = internal::spectrum::getaxispar($SP,'cnoe'); @required = qw(C H1 H2); util::assert( $util::array::{allin}, [map {$ax->{$_}{label}} keys %$ax], \@required, "axes ".util::word::join(@required).'are required '. 'for spectrum cnoe' ); util::assert( $util::array::{isnum}, [map {$ax->{$_}{tol}} keys %$ax], 'tolerance definition is required for all axes '. 'for cnoe spectrum' ); internal::spectrum::filter::symm($SP); } elsif (util::array::isin($method, ['isolated','showdup'])) { my $tol = $argv->{-tol}; internal::spectrum::filter::isolated($SP,$tol,$method); } else { die "internal: $method unimplemented in internal::spectrum::filter"; } } sub internal::spectrum::filter::symm # resulting assignments will be 'dummy' i.e.' there will be no # connections to atoms, so there will be 'ghost' peaks # they will need to be filtered out with using resonance assignments { my $sp = shift; my @spectra = internal::spectrum::names($sp); my @spectra = util::array::thosein(\@spectra,['nnoe','cnoe']); # unfold axes # for each spectrum remember groups of peaks generated by unfolding my %groups; foreach my $spectrum (@spectra) { my $peaks = internal::spectrum::getpeaks($sp,$spectrum); my $ax = internal::spectrum::getaxispar($sp,$spectrum); # populate array with refs to peaks due to unfolding of each peak my @groups; foreach my $peak (@$peaks) { # put ref to peakarray generated by unfolding of $peak push @groups, internal::peak::unfold($peak,$ax); } # repack peaks from groups into a single list for unfolded spectrum # and mark all peaks with a number of a group they belong to my @list; my $i = 0; foreach my $group (@groups) { push @list, @$group; # mark all peaks with a group number foreach my $upeak (@$group) { $upeak->{foldgroup} = $i; } $i++; } my $numgroups = scalar (@groups); if ($numgroups > 0) { my $numpeaks = 0; foreach my $group (@groups) { $numpeaks += scalar (@$group); } print util::line::wrap(-line=>"Unfolded axes for spectrum $spectrum"), "\n"; print util::line::wrap(-line=>"Was $numgroups peaks before unfolding"),"\n"; print util::line::wrap(-line=>"Obtainded $numpeaks peaks after unfolding"), "\n"; $numpeaks /= $numgroups; my $line = sprintf "On average %4.2f unfoled peaks per one observed peak",$numpeaks; print util::line::wrap(-line=>$line),"\n"; } # replace spectrum with its unfolded representation internal::spectrum::setpeaks($sp,$spectrum,\@list); #internal::spectrum::setaxis($unfolded,$spectrum,$ax); # remember groups generated by unfolding of the spectrum $groups{$spectrum} = \@groups; } # calculate master axis correspondence table my %d = internal::spectrum::defaults::axis(); my $axtbl = $d{noebase};# get symbolic axis correspondence table my %axtbl; # and translate into this numeric one foreach my $spectrum (@spectra) { my $ax = internal::spectrum::getaxispar($sp,$spectrum); my $axlist = $axtbl->{$spectrum} or die "axis table for spectrum \'$spectrum\' not defined"; if (scalar(@$axlist) != 2) { die "internal: expected two elements in ". "array, found " . scalar(@$axlist); } # get axis numerical key for each axis label my %num; my @nums = keys %$ax; foreach my $n (@nums) { $num{$ax->{$n}{'label'}} = $n; } # extract numerical labels for axes of interest my @ax = map {$num{$_}} @$axlist; # store those labels in the axis correspondence table $axtbl{$spectrum} = \@ax; } # find symmetry peak for each peak from each spectrum but dont repeat for (my $i=0; $i$i is a non-repeating condition i.e. dont look back for (my $j=$i; $j[1][0]; my $p = internal::spectrum::getpeaks($sp,$S2); my @P2 = sort {$a->{'pos'}[$D]<=>$b->{'pos'}[$D]} @$p; # figure out final tolerance as average of corresp axes my @TOL; for (my $d=0;$d<2;$d++) { my $d1 = $TBL->[0][$d]; my $d2 = $TBL->[1][$d]; push @TOL, 0.5*($tol1->{$d1} + $tol2->{$d2}); } # find symmetry peaks from S2 for each peak from S1 my $k=0; foreach my $peak (@$P1) { #printf "%d\n",$k++; next if internal::peak::isdiag($peak,$tol1,$TBL->[0]); my $symm = internal::peak::findsymm($peak,\@P2, \@TOL,$TBL); if (not defined $peak->{symm}) { $peak->{symm} = []; } push @{$peak->{symm}}, @$symm; # remember them } } } # here select peaks that have have symm partner # oops i think i deleted too many peaks here # so comment it out # foreach my $S (@spectra) # { # my $P = internal::spectrum::getpeaks($sp,$S); # my @symm; # foreach my $p (@$P) # { # my $symm = $p->{symm}; # push @symm, $p if defined $symm and scalar @$symm > 0; # } # internal::spectrum::setpeaks($sp,$S,\@symm); # } } sub internal::spectrum::dim { my ($spec,$set) = @_; $set = internal::spectrum::name($spec,$set); my $peak = $spec->{spectra}{$set}[0]; return internal::peak::dim($peak); } sub internal::spectrum::isa { die "internal: function internal::spectrum::isa not working"; } sub internal::spectrum::name { my ($spectra,$set) = @_; if ($spectra->{type} eq 'multiple') { if (not defined $set) { die "internal: internal::spectrum::name error"; } } elsif (scalar(keys %{$spectra->{spectra}}) > 1) { die "internal: spec data internally inconsistent"; } else { my @sp = keys %{$spectra->{spectra}}; $set = $sp[0]; } return $set; } sub internal::spectrum::peak::findSymmByAss { my ($sp,$spectra,$peak,$ass) = @_; my @allspec = internal::spectrum::names($sp); util::assert(sub{util::array::allin($spectra,\@allspec)}, "one of " . join(',',@$spectra) . " spectra not found ". "in the dataset"); # determine symmetric symbolic assignment my $copy = internal::peak::copy($peak); internal::peak::setass($copy,$ass); my $symm = internal::peak::findSymmPeakLoc($copy,$sp); my $lab = internal::spectrum::axis::getlabels($sp,$symm->{spectrum}); # todo important here this is actually interesing # maybe you still can search for symmetry peaks # even though not all frequencies are known # and use result of that search to find unknown frequencies my $pos = internal::peak::getcoor($symm); if (not defined $pos) { app::message('search for symmetry peaks aborted'); return undef; } internal::peak::setcoor($symm,$pos); # search for real peaks close to this artificial one my $symmpeaks = internal::spectrum::peak::findclose($sp,$symm); return $symmpeaks; } sub internal::spectrum::peaks #give peaks from one spectrum { my ($spectra,$set) = @_; $set = internal::spectrum::name($spectra,$set); my $peaks = $spectra->{spectra}{$set}; if (not defined $peaks) { app::error("spectrum $set is inknown, check your data"); } return @$peaks; } sub internal::spectrum::setpeaks { my ($sp,$name,$peaks) = @_; $sp->{spectra}{$name} = $peaks; if (scalar(keys %{$sp->{spectra}}) > 1) { $sp->{type} = 'multiple'; } return; } sub internal::spectrum::indexpeaks { my ($sp,$name) = @_; my $dim = internal::spectrum::dim($sp,$name); my %index; for (my $i=0; $i<$dim; $i++) { my $peaks = internal::spectrum::getpeaks($sp,$name); my @sorted = sort {$a->{pos}[$dim]<=>$b->{pos}[$dim]} @$peaks; my $lim = scalar(@sorted); for (my $i = 0; $i < $lim; $i++) { my $peak = $sorted[$i]; if (not defined $peak->{'index'}) { $peak->{'index'} = {}; } $peak->{'index'}{$dim} = $i; } $index{$dim} = \@sorted; } return \%index; } sub internal::spectrum::getpeaks { my ($sp, $name) = @_; return $sp->{spectra}{$name}; } sub internal::spectrum::read # read one set of spectra from disk { my $file = shift; app::message("reading data from $file ..."); return util::readdata($file); app::message("done."); } sub internal::spectrum::merge { die 'unimplemented'; my @files; my $S; foreach my $file (@files) { #todo validate that each file is #actually contains spectral data my $s = util::readdata($file); # merge mutiple spectral data my $sp = $s->{spectra}; my $SP = $S->{spectra}; foreach my $spectrum (keys %$sp) { my $peaks = $sp->{$spectrum}; if (defined $SP->{$spectrum}) { push @{$SP->{$spectrum}}, @$peaks; } else { $SP->{$spectrum} = $peaks; } } } } sub internal::spectrum::center # return hash whose keys are numeric axis labels and # values are positions of the spectrum's 'center of mass' # with all peaks treated as having equal weights # if parameter $axes is not given, report center along all # dimensions, otherwise use $axes as a array ref with # the list of dimensions { my ($sp,$name) = @_; my @peaks = internal::spectrum::peaks($sp,$name); my $dim = internal::spectrum::dim($sp,$name); my ($n,@center); $n = 0; foreach my $peak (@peaks) { for (my $i=0; $i<$dim; $i++) { $center[$i] += $peak->{pos}[$i]; } $n++; } if ($n == 0) { return undef; } foreach (@center) { $_ /= $n; } return @center; } sub internal::spectrum::bounds { my ($sp, $name) = @_; my $peaks = internal::spectrum::getpeaks(@_); my $dim = internal::spectrum::dim(@_); my @lo = @{internal::peak::getcoor($peaks->[0])}; my @hi = @lo; for (my $i=0;$i<$dim;$i++) { foreach my $peak (@$peaks) { my $coor = internal::peak::getcoor($peak, $i); $lo[$i] = $coor if $coor < $lo[$i]; $hi[$i] = $coor if $coor > $hi[$i]; } } return [\@lo,\@hi]; } package bruker; sub bruker::findpar { my $name = shift; $name =~ tr/[a-z]/[A-Z]/; #special parameters that must be looked up in files different from #acqus (i.e. in acqu2s etc. must be treated separately but for now #there is no code for that. only file acqus will be read #report all matching the request: if no number asked, give whole array #if number is asked give only one element my $filename = 'acqus'; my @lines = util::readfile($filename); my %results; if ($name =~ /\d/) { #1. try to find numbered parameter directly #report error if there is more then one my @file = @lines; if (not defined $results{$name}) { $results{$name} = []; } while (@file) { if (util::array::ff(\@file,'^##\$'.$name.'\=')) { my $line = shift @file; if ($line =~ /$0..(\d+)$/) { app::error("arrayed value found for numbered parameter $name, line: $line"); } else { $line =~ /=(.*)$/; my $value = util::line::peel($1); push @{$results{$name}}, {'type'=>'scalar','value'=>$value}; } } } #2. get prefix for array parameter $name =~ /^([^\d]+)(\d+)$/; my $base = $1; my $index = $2; if (not defined $base or not defined $index) { app::error("illegal parameter name $name, must be letters followed by digits"); } @file = @lines; while (@file) { if (util::array::ff(\@file,'^##\$'.$base.'\=')) { if ($file[0] =~ /$0..(\d+)$/) { my $array = bruker::grabarray(\@file,$filename); my $size = scalar(@$array); if ($index > $size) { my $last = $size-1; my $value = "index $index is out of range in arrayed parameter $base (0..$last)"; push @{$results{$name}},{'type'=>'scalar','value'=>$value}; } else { push @{$results{$name}},{'type'=>'scalar','value'=>$array->[$index]}; } } else #this would be an unusual result { my $line = shift @file; $line =~ /=(.*)$/; my $value = util::line::peel($1); $value =~ tr/<>/'/; $value = "found scalar parameter $base with value $value"; push @{$results{$name}}, {'type'=>'scalar','value'=>$value}; } } } } else { #collect all parameters that have given prefix (entirely up to beginning of number if there is any) #if there is no array parameter with that prefix, then print is as scalar #if there is array parameter, then print all elements of the array #if there are multiple numbered parameters that match, print result as array #if numbered parameter(s) and array are found, return all my @file = @lines; my %implicit; while (@file) #grab implicit arrays { if (util::array::ff(\@file,'^##\$'.$name.'\d+\=')) { my $line = shift @file; if ($line =~ /$0..(\d+)$(.*$)/) { app::error("unexpected nested array parameter $name found in file $filename"); } else { $line =~ /$name(\d+)=(.*)$/; my $index = $1; my $value = util::line::peel($2); $value =~ tr/<>/'/; if (defined ($implicit{$index})) { app::error("multiple instances of parameter $name found in file $filename"); } $implicit{$index} = $value; } } } push @{$results{$name}}, {'type'=>'hash','value'=>\%implicit}; @file = @lines; while (@file) #grab regular arrays { if (util::array::ff(\@file,'^##\$'.$name.'= $0..\d+$')) { my $array = bruker::grabarray(\@file,$filename); push @{$results{$name}}, {'type'=>'array','value'=>$array}; } } @file = @lines; while (@file) #grab scalars { if (util::array::ff(\@file,'^##\$'.$name.'=')) { my $line = shift @file; $line =~ /= (.*)$/; my $value = util::line::peel($1); $value =~ tr/<>/'/; next if $value =~ /$0..\d+$/; push @{$results{$name}}, {'type'=>'scalar','value'=>$value}; } } } my @names = keys %results; if (scalar(@names) > 1) { app::error("more then one matching parameter found in file $filename: ". join(',',@names)); } else { util::dump(\%results); my $numval = scalar(@{$results{$names[0]}}); if ($numval>1) { app::error("found $numval instance(s) of parameter $name in file $filename"); } } return $results{$names[0]}[0]; } sub bruker::grabarray #it is assumed that array elements start at the beginning of the first array element #entire array must contain strings { my ($file,$filename) = @_; my $line = shift @$file; my ($parname,$dim,$rest); if ($line =~ /^##\$([^=]+)= $0..(\d+)$(.*)$/) { ($parname,$dim,$rest) = ($1,$2+1,$3); if (not defined $rest) { $rest = ''; } unshift @$file,$rest; } else { die "internal: array parameter line expected, $line found"; } my $index = 0; my @array; while ($index < $dim) { my $line = shift @$file; chomp $line; if ($line =~ /^##\$/) { app::error("cannot parse line:\n$line\nin file\n$filename\n". "new parameter found where values for array ". "parameter $parname were expected"); } my @bits = split /\s+/, $line; while (scalar(@bits) > 0 and $index < $dim) { push @array, shift @bits; $index++; } } return \@array; } package internal::assignment; sub internal::assignment::text # todo maybe merge this subroutine with internal::atom::text # and deal with multiple atoms the same way as with one { my $ass = shift; if (ref $ass eq 'ARRAY') { my %res; foreach my $as (@$ass) { my $residue = $as->{residue}; if ($as->{segid} ne '') { $residue = $as->{segid} . '.' . $residue; } push @{$res{$residue}},$as->{atom}; } my @res; foreach my $res (sort keys %res) { my @atoms = @{$res{$res}}; push @res, sprintf("%4s.%s", $res, join('|',sort(@atoms))); } return join(',',@res); } else { return internal::atom::text($ass); } } package internal::atom::text; sub internal::atom::text { my $a = shift; my $aa = $a->{residue}; my $segid = $a->{segid}; my $atom = $a->{atom}; if (defined $segid and $segid ne '') { $atom = $segid . '.' . $atom; } return sprintf "%4s.%-4s", $atom, $aa; } package internal::peak::text; sub internal::peak::findSymmLocByAss { my ($ass,$sp) = @_; my $loc = internal::peak::new::fromAss($ass,$sp); return internal::peak::findSymmPeakLoc($loc,$sp); } sub internal::peak::findSymmPeakLoc { my ($peak,$sp) = @_; my $spectrum = internal::peak::whatspectrum($peak); my $ass = internal::peak::getass($peak); my $leadPROT = internal::noe::leadreson($sp,$spectrum,$ass); my $anchorPROT = internal::noe::anchorreson($sp,$spectrum,$ass); my $leadHET = bio::aminoacid::atoms::getconnected($leadPROT); $spectrum = internal::noe::guessSpecByHet($leadHET); if (not defined $spectrum) { app::message('could not determine what spectrum to use '. 'for the symmetry peak search'); app::message('so the symmetry peak search is aborted'); return undef; } # CAVEAT here I used spectrum name determined from internal # defaults table, but spectrum in the actual dataset comes from # outside, so its name must be the same as the one found by # guessSpecByHet my $symass = internal::peak::ass::new( {X=>$leadPROT,Y=>$leadHET,Z=>$anchorPROT}, $sp, $spectrum); # form a new peak return internal::peak::new::fromAss($symass,$sp,$spectrum); } sub internal::peak::new::fromAss { my ($ass,$sp,$spectrum) = @_; if (not defined $spectrum) { # try to determine spectrum by type of heteroatom # if determination is ambiguous then return undef my @atoms = map {$_->{atom}} @$ass; my @hetero; foreach my $atom (@atoms) { return undef if not defined $atom; push @hetero, $atom if $atom !~ /^H/; } return undef if scalar(@hetero) != 1; my @names = internal::spectrum::names($sp); if ($hetero[0] =~ /^C/) { if (util::array::isin('cnoe',\@names)) { $spectrum = 'cnoe'; } else { return undef; } } elsif ($hetero[0] =~ /^N/) { if (util::array::isin('nnoe',\@names)) { $spectrum = 'nnoe'; } else { return undef; } } } my $peak = internal::peak::new($spectrum); internal::peak::setass($peak,$ass); internal::peak::setpos::byAss($peak,$sp); return $peak; } sub internal::peak::text::ass { my $peak = shift; my $ass = $peak->{ass}; return if not defined $ass; my $text = ''; foreach my $a (@$ass) { $text .= internal::atom::text($a); } return $text; } sub internal::peak::text::pos { my $peak = shift; my $pos = $peak->{pos}; die "pos expected" if not defined $pos; my $text = ''; foreach (@$pos) { $_ = sprintf "%7.2f", $_; } return join('',@$pos); } package math::point; sub math::point::getclose { my ($point,$others,$tol,$tbl) = @_; # extract lists of corresponding dimensions if (not defined $tbl) { die "feature not implemented"; # here i need to # 1) make sure that both points # belong to space of same dimensionality # 2) build table of one to one correspondence # or maybe just find some shortcut to making this # easier # just want to look at all dimensions with # one on one correspondence } my $dim1 = $tbl->[0]; my $dim2 = $tbl->[1]; # define variables used in first pass of search my $HI = scalar(@$others) - 1; my $LO = 0; my $x1 = math::point::getcoor($point,$dim1->[0]); my $TOL = $tol->[0]; my $D1 = $dim1->[0]; my $D2 = $dim2->[0]; # check if search is necessary at all: maybe our peak # is outside bounds of @$others my $hicoor = math::point::getcoor($others->[$HI],$D2); my $locoor = math::point::getcoor($others->[$LO],$D2); return [] if ($x1 - $hicoor > $TOL); return [] if ($locoor - $x1 > $TOL); # do first pass of search using sorted list of peaks by X coord # and store the result in the @$maybe array my $maybe = []; while ( $HI-$LO > 1) { my $n = int (0.5*($LO + $HI)); my $coor = math::point::getcoor($others->[$n],$D2); my $delta = $x1 - $coor; if ($delta > $TOL) { $LO = $n; } elsif ($delta < -1*$TOL) { $HI = $n; } else { # found a peak within bounds at position n # find lower bound my $hi = $n; my $lo = $LO; while($hi - $lo > 1) { my $cur = int (0.5*($hi + $lo)); my $coor = math::point::getcoor($others->[$cur],$D2); my $delta = $x1 - $coor; if ($delta > $TOL) { $lo = $cur; } else { $hi = $cur; } } $LO = $hi; # now LO is the inclusive low bound # init search of high bound $hi = $HI; $lo = $n; while ($hi - $lo > 1) { my $cur = int (0.5*($hi + $lo)); my $coor = math::point::getcoor($others->[$cur],$D2); my $delta = $coor - $x1; if ($delta > $TOL) { $hi = $cur; } else { $lo = $cur; } } $HI = $lo;# HI is the inclusive high bound $maybe = util::array::extract($others, $LO, $HI); last; } } my @symm; foreach my $p (@$maybe) { push @symm, $p if math::point::isnear($point,$p,$tol,$tbl); } return \@symm; } sub math::point::getcoor { my ($point,$dim) = @_; if (defined $dim) { if ($dim < 0 or $dim > math::point::dim($point)) { die "internal: dimension requested for point ". "coordinate out of range"; } return $point->{'pos'}[$dim]; } else { return $point->{'pos'}; } } sub math::point::isnear { my ($p1, $p2, $tol, $tbl) = @_; if (not defined $tbl) { # todo make this code faster by removing # generalization my $dim1 = math::point::dim($p1); my $dim2 = math::point::dim($p2); if ($dim1 != $dim2) { die 'internal: dim1 and dim2 dont agree'; } $tbl = [[],[]]; for (my $i=0;$i<$dim1;$i++) { $tbl->[0][$i] = $i; $tbl->[1][$i] = $i; } } if (not defined $tol) { die 'internal: tolerance must be defined'; } elsif (not ref $tol) { my $TOL = $tol; $tol = []; # convert scalar tolerance to hash my $dim = scalar(@{$tbl->[0]}); for (my $i=0;$i<$dim;$i++) { push @$tol, $TOL; } } my $dim = scalar (@{$tbl->[0]}); my $delta; for (my $i=0; $i<$dim; $i++) { # todo point move this out to math::point::dist my $coor1 = math::point::getcoor($p1,$tbl->[0][$i]); my $coor2 = math::point::getcoor($p2,$tbl->[1][$i]); my $d = ($coor1 - $coor2)/$tol->[$i]; $delta += $d*$d; } return 1 if $delta < 1; return 0; } sub math::point::dim { my $p = shift; return scalar(@{$p->{'pos'}}); } package internal::peak; sub internal::peak::fold { my ($peak,$sp) = @_; #util::dump($peak); die "fundtion internal::peak::fold not implemented"; #sub internal::resonance::fold # fold ass table to spectral window #{ #my ($ass,$ref,$sw,$atoms) = @_; #} } sub internal::peak::assdev { my ($peak, $ass, $sp) = @_; my $spec = internal::peak::whatspectrum($peak); my $coor = internal::peak::getcoor($peak); my $tol = internal::spectrum::axis::gettol($sp,$spec); my @tol; foreach my $axnum (sort {$a<=>$b} keys %$tol) { push @tol, $tol->{$axnum}; } my $dim = internal::peak::dim($peak); my $dist = 0; for (my $i = 0; $i<$dim; $i++) { $dist += (($coor->[$i] - $ass->[$i]{'shift'})/$tol[$i])**2; } return sqrt($dist); } sub internal::peak::symmdist { my ($peak1,$peak2,$sp) = @_; my $x1 = internal::peak::getcoor($peak1,'X',$sp); my $z1 = internal::peak::getcoor($peak1,'Z',$sp); my $x2 = internal::peak::getcoor($peak2,'X',$sp); my $z2 = internal::peak::getcoor($peak2,'Z',$sp); return [abs($x1-$z2),abs($x2-$z1)]; } sub internal::peak::whatspectrum { my ($peak,$sp) = @_; my $spec = $peak->{spectrum}; if (not defined $spec and defined $sp) { die "feature not implemented"; # try to determine where this peak might belong } elsif (defined $spec) { return $spec; } else { return undef; } } sub internal::peak::getvol { my $peak = shift; return $peak->{integral}; } sub internal::peak::noeass { my ($peak,$sp,$ass) = @_; my $spectrum = internal::peak::whatspectrum($peak); my $tol = internal::spectrum::axis::gettol($sp,$peak->{spectrum}); my $dim = internal::peak::dim($peak); # initialize assignment table if it is not yet defined in the # spectrum data # todo fix this plug (used for 'backwards compatibility') my $mode = ''; if (not defined $ass) { $mode = 'use_internal_ass_table'; } if ($mode eq 'use_internal_ass_table') { internal::spectrum::initass($sp); } my @atoms; my $ax = internal::spectrum::getaxispar($sp,$spectrum); for (my $i=0;$i<$dim;$i++) { my $coor = internal::peak::getcoor($peak,$i); # find close atoms for each coordinate based on the assignment if ($mode eq 'use_internal_ass_table') { $ass = $ax->{$i}{'ASS'}; } my $close = internal::resonance::findclose($coor,$ass,$tol->{$i}); #here now #util::print($coor,$close,$ass->[0]{'shift'}); $atoms[$i] = $close; } #util::print(\@atoms); #util::savedata($ax->{2}{'ASS'},'junk'); #util::dump($peak->{'pos'}); my @noeass = multy(@atoms); foreach my $at (@atoms) { if (not defined $at or scalar @$at == 0) { @noeass = (); last; } } # validate each possible assignment # 1. make sure that atom type corresponds to each axis my $ax = internal::spectrum::getaxispar($sp,$spectrum); my %ax; for (my $i=0;$i<$dim;$i++) { $ax{$i} = $ax->{$i}{'label'}; } my @pass1; foreach my $as (@noeass) { for (my $i=0;$i<$dim;$i++) { my $atom = $as->[$i]{atom}; $atom =~ s/^(.).*$/\1/; my $tmp = $ax{$i}; $tmp =~ s/^(.).*$/\1/; next if $atom ne $tmp; } push @pass1, $as; } # 2. make sure that atoms on two axes are from same residue my %d = internal::spectrum::defaults::axis(); my $conn = $d{noeconn}{$spectrum}; my @pass2; my %lab = reverse %ax; my $d1 = $lab{$conn->[0]}; my $d2 = $lab{$conn->[1]}; foreach my $as (@pass1) { my $atom1 = $as->[$d1]; my $atom2 = $as->[$d2]; next if ($atom1->{residue} ne $atom2->{residue}); push @pass2, $as; } # 3. make sure that those atoms are connected based on # aminoacid formula my @pass3; foreach my $as (@pass2) { if (bio::aminoacid::atoms::connected($as->[$d1],$as->[$d2])) { push @pass3, $as; } } $peak->{noeass} = \@pass3; sub multy { my @ar = @_; if (scalar @ar == 1) { my @out; foreach my $el (@{$ar[0]}) { push @out, [$el]; } return @out; } my $level = pop @ar; my @out; foreach my $el (@$level) { my @higher = multy(@ar); foreach my $inhi (@higher) { my @new = @$inhi; push @new, $el; push @out, \@new; } } return @out; } } sub internal::peak::isdiag { my ($peak,$tol,$tbl) = @_; my $dx = $tbl->[0]; my $dy = $tbl->[1]; my $x = math::point::getcoor($peak,$dx); my $y = math::point::getcoor($peak,$dy); my $T = 0.5*($tol->{$dx} + $tol->{$dy}); return 1 if abs($x-$y) < $T; return 0; } sub internal::peak::findsymm { return math::point::getclose(@_); } sub internal::peak::unfold # clone peak according to axis folding parameters { my ($peak,$axes) = @_; my $dim = internal::peak::dim($peak); my @peaks; push @peaks, $peak; for (my $i=0; $i < $dim; $i++) { my $ax = $axes->{$i}; if ($ax->{fold} eq 'true') { my $hi = $ax->{hippm}; my $lo = $ax->{loppm}; my $sw = $ax->{sw}; my $coor = internal::peak::getcoor($peak,$i); for (my $j=1; $coor+$j*$sw < $hi; $j++) { my $new = internal::peak::copy($peak); my $val = $coor+$j*$sw; internal::peak::setcoor($peak,$val,$i); push @peaks, $new; } for (my $j=1; $coor-$j*$sw > $lo; $j++) { my $new = internal::peak::copy($peak); my $val = $coor-$j*$sw; internal::peak::setcoor($peak,$val,$i); push @peaks, $new; } } } return \@peaks; } sub internal::peak::copy { my $peak = shift; my %new = %$peak; foreach my $key (%$peak) { my $type = ref $peak->{$key}; if ($type eq 'ARRAY') { my @ar = @{$peak->{$key}}; $new{$key} = \@ar; } elsif ($type eq 'HASH') { my %h = %{$peak->{$key}}; $new{$key} = \%h; } } return \%new; } sub internal::peak::text { my $peak = shift; my $pos = $peak->{'pos'}; my $spectrum = $peak->{spectrum}; my $id = $peak->{id}; my $ass = internal::peak::text::ass($peak); my $pos = internal::peak::text::pos($peak); my $text; if (defined $ass) { $text .= sprintf "%25s at ", $ass; } if (defined $id) { $id = "peak #$id"; } return sprintf "%s%-25s%s in spectrum %s", $text, $pos, $id, $spectrum; } sub internal::peak::isa # return 1 if $in is a reference to valid peak or array of peaks # return 0 otherwise { my $in = shift; if (ref $in eq 'ARRAY') { if (defined $in->[0]) { $in = $in->[0]; } else { return 0; } } if (defined $in->{pos}) { if (util::array::isa($in->{pos})) { # todo make another check that # all values are numbers return 1; } else { return 0; } } else { return 0; } } sub internal::peak::ass::new { my ($arg,$sp,$name) = @_; my $dim = scalar(keys %$arg); util::assert(sub{util::array::allin([keys %$arg],['X','Y','Z'])}, "internal error: unknown symbol for axis label"); my @ass; foreach my $ax (keys %$arg) { my $num = internal::spectrum::axis::getdimnum($sp,$name,$ax); $ass[$num] = $arg->{$ax}; } return \@ass; } sub internal::peak::getass { my $peak = shift; return $peak->{ass}; } sub internal::peak::setpos::byAss { my ($peak,$sp) = @_; my $ASS = internal::spectrum::getdata($sp,'ASS'); my $spectrum = $peak->{spectrum}; my @pos; my $bad = 0; my $ass = internal::peak::getass($peak); if (not defined $spectrum) { die "internal: spectrum name must be defined for the peak for ". "this function to work"; } my $ax = internal::spectrum::getaxispar($sp,$spectrum); my @A = keys %$ax; foreach my $a (@A) { my $ass = $ax->{$a}{'ASS'}; if (not defined $ass) { die "internal: spectrum $spectrum does not have defined assignment tables"; } } my $dim = scalar(@$ass); for (my $i=0;$i<$dim;$i++) { my $el = $ass->[$i]; my $ASS = $ax->{$i}{'ASS'}; my $res = internal::resonance::findfreq($el,$ASS); if (not defined $res) { $bad = 1; app::message('chemical shift for '. internal::atom::text($el). ' unknown'); } push @pos, $res; } if ($bad == 1) { internal::peak::setpos($peak,undef); } internal::peak::setpos($peak,\@pos); } sub internal::peak::setpos { my ($peak,$pos) = @_; $peak->{'pos'} = $pos; return; } sub internal::peak::ass::issymm { my %arg = @_; my $a1 = $arg{ass1}; my $a2 = $arg{ass2}; my $peak1 = $a1->{peak}; my $peak2 = $a2->{peak}; my $as1 = $a1->{ass}; my $as2 = $a2->{ass}; my $sp = $arg{spectrum}; my $x1 = internal::peak::ass::getdim('X',$peak1,$as1,$sp); my $z1 = internal::peak::ass::getdim('Z',$peak1,$as1,$sp); my $x2 = internal::peak::ass::getdim('X',$peak2,$as2,$sp); my $z2 = internal::peak::ass::getdim('Z',$peak2,$as2,$sp); return 1 if (internal::peak::ass::dim::issame($x1,$z2) and internal::peak::ass::dim::issame($x2,$z1)); return 0; } sub internal::peak::ass::dim::issame { my ($dim1,$dim2) = @_; return 0 if $dim1->{atom} ne $dim2->{atom}; return 0 if $dim1->{residue} ne $dim2->{residue}; return 1; } sub internal::peak::ass::getdim # get component of peak assignment corresponding to # specific named dimension. names should be defined in # spectrum::defaults::axes { # todo probably peaks should have reference to parent # spectrum object my ($axisname,$peak,$ass,$sp) = @_; my %d = internal::spectrum::defaults::axis(); my $b = $d{axdef}; my $spec = $peak->{spectrum}; my $b = $b->{$spec}; if (not defined $b) { die "internal: axdef not found for spectrum ". "$spec axis $axisname"; } my $lbl = internal::spectrum::axis::getlabels($sp,$spec); my $dim = $lbl->{$b->{$axisname}}; return $ass->[$dim]; } sub internal::peak::assert # check a single peak and die it is not # a peak { my $peak = shift; if (not internal::peak::isa($peak)) { die "internal: peak assertion failure"; } } sub internal::peak::getcoor # returns value of a coordinate for a particular dimension # dimensions count 0 and up { my ($peak,$dim,$sp) = @_; internal::peak::assert($peak); if ($dim =~ /^[XYZA]$/) { my $name = $peak->{spectrum}; $dim = internal::spectrum::axis::getdimnum($sp,$name,$dim); return math::point::getcoor($peak,$dim); } else { return math::point::getcoor(@_); } } sub internal::peak::setcoor # returns value of a coordinate for a particular dimension # dimensions count 0 and up { my ($peak,$val,$dim) = @_; internal::peak::assert($peak); if (defined $dim) { if ($dim < 0 or $dim > internal::peak::dim($peak)) { die "internal: dimension requested for peak ". "coordinate out of range"; } $peak->{pos}[$dim] = $val; return; } elsif (ref $val eq 'ARRAY') { $peak->{pos} = $val; } else { die "oops..."; } } sub internal::peak::setass { my ($peak,$ass) = @_; $peak->{ass} = $ass; } sub internal::peak::new { my $spectrum = shift; my $peak = {'pos'=>[]}; if (defined $spectrum) { $peak->{spectrum} = $spectrum; } return $peak; } sub internal::peak::dim { my $in = shift; if (not internal::peak::isa($in)) { die "internal: peak expected"; } return scalar(@{$in->{pos}}); } sub internal::peak::near # tells '1' if two peaks are near each other # (i.e. within tolerance) and '0' otherwise # $peak1 and $peak2 are references to 'peak' hash # $tol is a reference to 'tol' hash that has keys # dimensions in 'computer readable' order 0 and up { my ($peak1, $peak2, $tol) = @_; my $pos1 = $peak1->{pos}; my $pos2 = $peak2->{pos}; my $delta; foreach my $dim (keys %$tol) { app::error("tolerance must be > 0") if $tol->{$dim} == 0; my $rel = abs($pos1->[$dim] - $pos2->[$dim])/$tol->{$dim}; $delta += $rel*$rel; } return 1 if $delta < 1; return 0; } package reports::protein::restraints; sub reports::protein::restraints::new { my ($self,%arg) = @_; my $seq = $arg{sequence}; my $noe = $arg{noe}; my $ass = $arg{ass}; my $hnha = $arg{hnha}; my $ps = new PostScript::Simple(papersize => "letter", colour => 1, eps => 0, units => "mm",landscape=>1); $self = bless {}; $self->{'ps'} = $ps; $self->{'sequence'} = $seq; $self->{'noe'} = $noe; $self->{'ass'} = $ass; $self->{'hnha'} = $hnha; $self->{'line_height'} = 5; $self->{'step_height'} = $self->{'line_height'}*0.2; $self->{'label_width'} = 20; $self->{'cell_width'} = 2.5; $self->{'margin_left'} = 10; $self->{'mm_per_ppm'} = 2; $self->{'top'} = 216 - $self->{'line_height'}; $self->{'cursorY'} = $self->{top}; $self->{'line_width'} = 0.2; $ps->setlinewidth($self->{'line_width'}); $ps->setfont("Arial",9); $ps->newpage(); $self->process_noe(); $self->print_sequence(); $self->move_cursorY(); $self->print_hnha(); $self->move_cursorY(); $self->print_sequentials('aN(i+1)',$self->{AN1}); $self->move_cursorY(); $self->print_sequentials('bN(i+1)',$self->{BN1}); $self->move_cursorY(); $self->print_sequentials('NN(i+1)',$self->{NN1}); $self->print_intermediates(); $self->print_ca_rcoil_diff(); return $self; } sub reports::protein::restraints::stdout{ my $self = shift; print $self->{ps}->get(); } sub reports::protein::restraints::process_noe{ #sequential HA - HN 0 - none, 1 - weak, 2 - strong my $self = shift; my @seq = @{$self->{sequence}}; my $noe = $self->{noe}; my @SEQ_AN = util::array::init($seq[0]+1,999999); my @SEQ_NN = util::array::init($seq[0]+1,999999); my @SEQ_BN = util::array::init($seq[0]+1,999999); my @S3_AN = util::array::init($seq[0]+1,0); my @S3_AB = util::array::init($seq[0]+1,0); my @S2_AN = util::array::init($seq[0]+1,0); my @S2_NN = util::array::init($seq[0]+1,0); my @S4_AN = util::array::init($seq[0]+1,0); $self->{AN1} = \@SEQ_AN; $self->{BN1} = \@SEQ_BN; $self->{NN1} = \@SEQ_NN; $self->{AN2} = \@S2_AN; $self->{NN2} = \@S2_NN; $self->{AN3} = \@S3_AN; $self->{AB3} = \@S3_AB; $self->{AN4} = \@S4_AN; foreach my $r (@$noe){ my $S = $r->{'selections'}; my $upl = $r->{'dist'} + $r->{'dplus'}; foreach my $pair (@$S){ my $g1 = $pair->[0]; my $g2 = $pair->[1]; foreach my $a1 (@$g1){ my $res1 = $a1->[0]; my $name1 = $a1->[1]; foreach my $a2 (@$g2){ my $res2 = $a2->[0]; my $name2 = $a2->[1]; if ($res1 > $res2){ ($res1,$res2) = ($res2,$res1); ($name1,$name2) = ($name2,$name1); } if ($res2-$res1 == 1){ #sequentials if ($seq[$res1] ne 'P' and $seq[$res2] ne 'P'){ if ($name2 eq 'HN'){ if ($name1 eq 'HA'){ util::scalar::set_lower(\$SEQ_AN[$res1],$upl); } elsif ($name1 eq 'HN'){ util::scalar::set_lower(\$SEQ_NN[$res1],$upl); } elsif ($name1 =~ /^HB/){ util::scalar::set_lower(\$SEQ_BN[$res1],$upl); } } } elsif ($seq[$res1] ne 'P' and $seq[$res2] eq 'P'){ if ($name2 =~ /^HD/){ if ($name1 eq 'HA'){ util::scalar::set_lower(\$SEQ_AN[$res1],$upl); } if ($name1 eq 'HN'){ util::scalar::set_lower(\$SEQ_NN[$res1],$upl); } elsif ($name1 =~ /^HB/){ util::scalar::set_lower(\$SEQ_BN[$res1],$upl); } } } elsif ($seq[$res1] eq 'P' and $seq[$res2] ne 'P'){ if ($name2 eq 'HN'){ if ($name1 eq 'HA'){ util::scalar::set_lower(\$SEQ_AN[$res1],$upl); } elsif ($name1 =~ /^HD/){ util::scalar::set_lower(\$SEQ_NN[$res1],$upl); } elsif ($name1 =~ /^HB/){ util::scalar::set_lower(\$SEQ_BN[$res1],$upl); } } } } elsif ($res2 - $res1 == 2){ if ($name1 =~ /^HA/){ if ($name2 eq 'HN' or ($seq[$res2] eq 'P' and $name2 =~ /^HD/)){ $S2_AN[$res1] = 1; } } elsif ($name1 eq 'HN' and $name2 eq 'HN'){ $S2_NN[$res1] = 1; } } elsif ($res2 - $res1 == 3){#aN and ab if ($name1 =~ /^HA/){ if ($name2 eq 'HN' or ($seq[$res2] eq 'P' and $name2 =~ /^HD/)){ $S3_AN[$res1] = 1; } elsif ($name2 =~ /^HB/){ $S3_AB[$res1] = 1; } } } elsif ($res2 - $res1 == 4){ if ($name1 =~ /^HA/){ if ($name2 eq 'HN' or ($seq[$res2] eq 'P' and $name2 =~ /^HD/)){ $S4_AN[$res1] = 1; } } } } } } } } sub reports::protein::restraints::print_ca_rcoil_diff{ my $self = shift; my $y = $self->{'cursorY'}; my $mm_per_ppm = $self->{'mm_per_ppm'}; my $lh = $self->{'line_height'}; my $lw = $self->{'label_width'}; my $cw = $self->{'cell_width'}; my $x = $self->{'margin_left'}; my $sh = $self->{'step_height'}; my $ps = $self->{'ps'}; my $seq = $self->{sequence}; my $ass = $self->{ass}; #get random coil data my %data = bio::protein::aminoacid::atom::shifts(); my @CAdiff = util::array::init(scalar(@$seq),undef); foreach my $a (@$ass){ if ($a->{atom} eq 'CA'){ my $res = $a->{'residue'}; my $shift = $a->{'shift'}; $res =~ /^([A-Z])(\d+)$/; my $resname = $1; my $resno = $2; my $rcoil = $data{$resname}{'CA'}{'rcoil'}; $CAdiff[$resno-1] = $shift - $rcoil; } } my $deltaY = util::array::max([$lh,$mm_per_ppm * util::array::max(\@CAdiff) + 2*$sh]); $y = $y - $deltaY; $ps->text($x,$y,'dCA'); $x += $lw; foreach my $dCA (@CAdiff){ $ps->box({filled=>1},$x,$y,$x+$cw,$y+$dCA*$mm_per_ppm); $x += $cw; } } sub reports::protein::restraints::print_hnha{ my $self = shift; my $ps = $self->{ps}; my $lw = $self->{label_width}; my $w = $self->{line_width}; my $cw = $self->{cell_width}; my $lh = $self->{line_height}; my $x = $self->{margin_left}; my $y = $self->{'cursorY'}; my $hnha = $self->{'hnha'}; my $seq = $self->{'sequence'}; $ps->text($x,$y,'JHNHa'); $x += $lw; my @out = util::array::init($seq->[0],undef); foreach my $r (@$hnha){ my $aa = $r->{selections}[0]; my $resid = $aa->[0][0][0]; my $j = $r->{coupling}; my $cx = $x + ($resid - 1)*$cw; my $r = $cw/3; if ($j < 6){ $ps->circle({filled=>1},$cx+$cw/2,$y+$r,$r); } elsif ($j >=6 and $j<=8){ $ps->text({align=>'centre'},$cx+$cw/2,$y,'x'); } elsif ($j > 8){ $ps->circle({filled=>0},$cx+$cw/2,$y+$r,$r-$w/2); } } } sub reports::protein::restraints::print_sequence{ my $self = shift; my @seq = @{$self->{sequence}}; my $ps = $self->{ps}; my $lw = $self->{label_width}; my $cw = $self->{cell_width}; my $x = $self->{margin_left}; my $y = $self->{'cursorY'}; $ps->text($x,$y,'sequence'); $x += $lw; shift @seq; foreach my $aa (@seq){ $ps->text({align=>'centre'},$x+$cw/2,$y,$aa); $x += $cw; } } sub reports::protein::restraints::print_sequentials{ my ($self,$label,$data) = @_; my $lw = $self->{label_width}; my $x = $self->{margin_left}; my $cw = $self->{cell_width}; my $lh = $self->{line_height}; my $y = $self->{cursorY}; my $ps = $self->{ps}; my $sh = $self->{step_height}; $ps->text($x,$y,$label); $x += $lw; shift @$data; foreach my $upl (@$data){ my $height = 0; if ($upl <= 3){ $height = 2*$sh; } elsif ($upl < 10){ $height = $sh; } if ($height > 0){ $ps->box({'filled'=>1},$x,$y,$x+$cw,$y+$height); } $x += $cw; } } sub reports::protein::restraints::move_cursorY{ my ($self,$delta) = @_; if (not defined $delta){ $delta = $self->{line_height}; } $self->{cursorY} -= $delta; return; } sub reports::protein::restraints::print_ladder{ my ($self,$lbl,$input,$depth) = @_; shift @$input; my $N = scalar(@$input); my @ladder = util::array::init($N,undef); my $clvl = 0; my $plvl = undef; my $prev = -$depth - 1; for (my $i=0; $i < $N - $depth; $i++){ if ($i-$prev == $depth){ if ($clvl > 1){ $clvl = 0; } } elsif ($i-$prev > $depth){ $clvl = 0; } if ($input->[$i] == 1){ $ladder[$i] = $clvl; $prev = $i; $plvl = $clvl; $clvl++; } } my $ns = util::array::max(\@ladder) + 1; my $ps = $self->{ps}; my $x = $self->{margin_left}; my $cw = $self->{cell_width}; my $lh = $self->{line_height}; my $lw = $self->{label_width}; my $cl = $self->{cline}; my $top = $self->{top}; my $sh = $self->{step_height}; my $deltaY = util::array::max([$lh,($ns+2)*$sh]); my $y = $self->{cursorY} - $deltaY; $ps->text($x,$y,$lbl); $x += $lw; for (my $i=0; $ibox({filled=>1},$cx,$cy,$cx+$cw*$depth,$cy+$sh); } } $self->move_cursorY($deltaY); } sub reports::protein::restraints::print_intermediates{ my $self = shift; $self->print_ladder('aN(i+3)',$self->{AN3},3); $self->print_ladder('ab(i+3)',$self->{AB3},3); $self->print_ladder('aN(i+2)',$self->{AN2},2); $self->print_ladder('NN(i+2)',$self->{NN2},2); $self->print_ladder('aN(i+4)',$self->{AN4},4); } # ============================================================ # XML:: # ____ # | _ \ _ _ _ __ ___ _ __ ___ _ __ # | | | | | | | '_ ` _ \| '_ \ / _ \ '__| # | |_| | |_| | | | | | | |_) | __/ | # |____/ \__,_|_| |_| |_| .__/ \___|_| # |_| # Perl module for dumping Perl objects from/to XML # ============================================================ =head1 NAME XML::Dumper - Perl module for dumping Perl objects from/to XML =head1 SYNOPSIS # ===== OO-way use XML::Dumper; $dump = new XML::Dumper; $xml = $dump->pl2xml( $perl ); $perl = $dump->xml2pl( $xml ); $dump->pl2xml( $perl, "my_perl_data.xml.gz" ); # ===== Functional way use XML::Dumper; $xml = pl2xml( $perl ); $perl = xml2pl( $xml ); =head1 EXTENDED SYNOPSIS use XML::Dumper; my $dump = new XML::Dumper; my $perl = ''; my $xml = ''; # ===== Convert Perl code to XML $perl = [ { fname => 'Fred', lname => 'Flintstone', residence => 'Bedrock' }, { fname => 'Barney', lname => 'Rubble', residence => 'Bedrock' } ]; $xml = $dump->pl2xml( $perl ); # ===== Dump to a file my $file = "dump.xml"; $dump->pl2xml( $perl, $file ); # ===== Convert XML to Perl code $xml = q| Fred Flintstone Bedrock Barney Rubble Bedrock |; my $perl = $dump->xml2pl( $xml ); # ===== Convert an XML file to Perl code my $perl = $dump->xml2pl( $file ); # ===== And serialize Perl code to an XML file $dump->pl2xml( $perl, $file ); # ===== USE COMPRESSION $dump->pl2xml( $perl, $file.".gz" ); # ===== INCLUDE AN IN-DOCUMENT DTD $dump->dtd; my $xml_with_dtd = $dump->pl2xml( $perl ); # ===== USE EXTERNAL DTD $dump->dtd( $file, $url ); my $xml_with_link_to_dtd = $dump->pl2xml( $perl ); =head1 DESCRIPTION XML::Dumper dumps Perl data to XML format. XML::Dumper can also read XML data that was previously dumped by the module and convert it back to Perl. You can use the module read the XML from a file and write the XML to a file. Perl objects are blessed back to their original packaging; if the modules are installed on the system where the perl objects are reconstituted from xml, they will behave as expected. Intuitively, if the perl objects are converted and reconstituted in the same environment, all should be well. And it is. Additionally, because XML benefits so nicely from compression, XML::Dumper understands gzipped XML files. It does so with an optional dependency on Compress::Zlib. So, if you dump a Perl variable with a file that has an extension of '.xml.gz', it will store and compress the file in gzipped format. Likewise, if you read a file with the extension '.xml.gz', it will uncompress the file in memory before parsing the XML back into a Perl variable. Another fine challenge that this module rises to meet is that it understands circular definitions and multiple references to a single object. This includes doubly-linked lists, circular references, and the so-called 'Flyweight' pattern of Object Oriented programming. So it can take the gnarliest of your perl data, and should do just fine. =head2 FUNCTIONS AND METHODS =over 4 =cut package XML::Dumper; require 5.005_62; use strict; use warnings; require Exporter; use XML::Parser; our @ISA = qw( Exporter ); our %EXPORT_TAGS = ( ); our @EXPORT_OK = ( ); our @EXPORT = qw( xml2pl pl2xml xml_compare xml_identity ); our $VERSION = '0.71'; our $COMPRESSION_AVAILABLE; BEGIN { eval { require Compress::Zlib; }; if( $@ ) { $COMPRESSION_AVAILABLE = 0; } else { $COMPRESSION_AVAILABLE = 1; } } our $dump = new XML::Dumper; # ============================================================ sub new { # ============================================================ =item * new() - XML::Dumper constructor. Creates a lean, mean, XML dumping machine. It's also completely at your disposal. =cut # ------------------------------------------------------------ my ($class) = map { ref || $_ } shift; my $self = bless {}, $class; $self->init; return $self; } # ============================================================ sub init { # ============================================================ my $self = shift; $self->{ perldata } = {}; $self->{ xml } = {}; 1; } # ============================================================ sub dtd { # ============================================================ =item * dtd - Generates a Document Type Dictionary for the 'perldata' data type. The default behaviour is to embed the DTD in the XML, thereby creating valid XML. Given a filename, the DTD will be written out to that file and the XML document for your Perl data will link to the file. Given a filename and an URL, the DTD will be written out the file and the XML document will link to the URL. XML::Dumper doesn't try really hard to determine where your DTD's ought to go or relative paths or anything, so be careful with what arguments you supply this method, or just go with the default with the embedded DTD. Between DTD's and Schemas, the potential for more free-form data to be imported and exported becomes feasible. Usage: dtd(); # Causes XML to include embedded DTD dtd( $file ); # DTD saved to $file; XML will link to $file dtd( $file, $url ); # DTD saved to $file; XML will link to $url dtd( 0 ); # Prevents XML from including embedded DTD =cut # ------------------------------------------------------------ my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/ ) ? shift : $dump; my $file = shift; my $url = shift; my $dtd = qq{ }; if( defined $file && $file ) { open DTD, ">$file" or die $!; print DTD $dtd; close DTD; $url = defined $url ? $url : $file; $self->{ dtd } = qq{ }; } elsif( not defined $file ) { $self->{ dtd } = join( "\n", "", "', ''); } else { delete $self->{ dtd }; return; } $self->{ dtd }; } # ============================================================ sub dump { # ============================================================ my $self = shift; my $ref = shift; my $indent = shift; my $string = ''; # ===== REFERENCES if( ref $ref ) { no warnings; local $_ = ref( $ref ); my $class = ''; my $address = ''; my $reused = ''; PERL_TYPE: { # ---------------------------------------- OBJECT: { # ---------------------------------------- last OBJECT if /^(?:SCALAR|HASH|ARRAY)$/; $class = $_; $class = "e_xml_chars( $class ); ($_,$address) = scalar( $ref ) =~ /$class=([^(]+)$([x0-9A-Fa-f]+)$/; } # ---------------------------------------- MEMORY_ADDRESS: { # ---------------------------------------- last MEMORY_ADDRESS if( $class ); ($_,$address) = scalar( $ref ) =~ /([^(]+)$([x0-9A-Fa-f]+)$/; } $reused = exists( $self->{ xml }{ $address } ); # ---------------------------------------- if( /^SCALAR$/ ) { # ---------------------------------------- my $type = ""; $self->{ xml }{ $address }++ if( $address ); $string = "\n" . " " x $indent . $type . ($reused ? '' : "e_xml_chars($$ref)) . ""; last PERL_TYPE; } # ---------------------------------------- if( /^HASH$/ ) { # ---------------------------------------- $self->{ xml }{ $address }++ if( $address ); my $type = "{ xml }{ $address } ? " memory_address=\"$address\"" : '' ). ">"; $string = "\n" . " " x $indent . $type; if( not $reused ) { $indent++; foreach my $key (sort keys(%$ref)) { my $type = "{ $key } ? '' : " defined=\"false\"" ) . ">"; $string .= "\n" . " " x $indent . $type; if (ref($ref->{$key})) { $string .= $self->dump( $ref->{$key}, $indent+1); $string .= "\n" . " " x $indent . ""; } else { $string .= "e_xml_chars($ref->{$key}) . ""; } } $indent--; } $string .= "\n" . " " x $indent . ""; last PERL_TYPE; } # ---------------------------------------- if( /^ARRAY$/ ) { # ---------------------------------------- my $type = ""; $string .= "\n" . " " x $indent . $type; $self->{ xml }{ $address }++ if( $address ); if( not $reused ) { $indent++; for (my $i=0; $i < @$ref; $i++) { my $defined; my $type = "[ $i ] ? '' : " defined=\"false\"" ) . ">"; $string .= "\n" . " " x $indent . $type; if (ref($ref->[$i])) { $string .= $self->dump($ref->[$i], $indent+1); $string .= "\n" . " " x $indent . ""; } else { $string .= "e_xml_chars($ref->[$i]) . ""; } } $indent--; } $string .= "\n" . " " x $indent . ""; last PERL_TYPE; } } # ===== SCALAR } else { my $type = ""; $string .= "\n" . " " x $indent . $type . "e_xml_chars( $ref ) . ""; } return( $string ); } # ============================================================ sub perl2xml { # ============================================================ pl2xml( @_ ); } # ============================================================ sub pl2xml { # ============================================================ =item * pl2xml( $xml, [ $file ] ) - (Also perl2xml(), for those who enjoy readability over brevity). Converts Perl data to XML. If a second argument is given, then the Perl data will be stored to disk as XML, using the second argument as a filename. Usage: See Synopsis =cut # ------------------------------------------------------------ my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/ ) ? shift : $dump; my $ref = shift; my $file = shift; $self->init; my $xml = ( defined $self->{ dtd } ? $self->{ dtd } : '' ) . "" . $self->dump( $ref, 1 ) . "\n\n"; if( defined $file ) { if( $file =~ /\.xml\.gz$/i ) { if( $COMPRESSION_AVAILABLE ) { my $compressed_xml = Compress::Zlib::memGzip( $xml ) or die "Failed to compress xml $!"; open FILE, ">:utf8", $file or die "Can't open '$file' for writing $!"; binmode FILE; print FILE $compressed_xml; close FILE; } else { my $uncompressed_file = $file; $uncompressed_file =~ s/\.gz$//i; warn "Compress::Zlib not installed. Saving '$file' as '$uncompressed_file'\n"; open FILE, ">:utf8", $uncompressed_file or die "Can't open '$uncompressed_file' for writing $!"; print FILE $xml; close FILE; } } else { no warnings; # to shut Perl up about Wide characters for UTF8 output open FILE, ">$file" or die "Can't open '$file' for writing $!"; print FILE $xml; close FILE; } } return $xml; } # ============================================================ sub undump { # ============================================================ # undump # Takes the XML generated by pl2xml, and recursively undumps it to # create a data structure in memory. The top-level object is a scalar, # a reference to a scalar, a hash, or an array. Hashes and arrays may # themselves contain scalars, or references to scalars, or references to # hashes or arrays, with the exception that scalar values are never # "undef" because there's currently no way to represent undef in the # dumped data. # # The key to understanding undump is to understand XML::Parser's # Tree parsing format: # # , [ { , <[children tag-array pair value(s)]...> ] # ------------------------------------------------------------ my $self = shift; my $tree = shift; my $callback = shift; my $ref = undef; my $item; # make Perl stop whining about deep recursion and soft references no warnings; TREE: for (my $i = 1; $i < $#$tree; $i+=2) { no warnings; local $_ = lc( $tree->[ $i ] ); my $class = ''; my $address = ''; PERL_TYPES: { # ---------------------------------------- if( /^scalar$/ ) { # ---------------------------------------- $ref = defined $tree->[ $i+1 ][ 2 ] ? $tree->[ $i +1 ][ 2 ] : ''; if( exists $tree->[ $i+1 ][ 0 ]{ 'defined' } ) { if( $tree->[ $i +1 ][ 0 ]{ 'defined' } =~ /false/i ) { $ref = undef; } } last TREE; } # ===== FIND PACKAGE if( $tree->[ $i+1 ] && ref( $tree->[ $i +1 ] ) eq 'ARRAY' ) { if( exists $tree->[ $i+1 ][0]{ blessed_package } ) { $class = $tree->[ $i+1 ][ 0 ]{ blessed_package }; } } # ===== FIND MEMORY ADDRESS if( $tree->[ $i+1 ] && ref( $tree->[ $i +1 ] ) eq 'ARRAY' ) { if( exists $tree->[ $i+1 ][0]{ memory_address } ) { $address = $tree->[ $i+1 ][ 0 ]{ memory_address }; } } ALREADY_EXISTS_IN_MEMORY: { if( exists $self->{ perldata }{ $address } ) { $ref = $self->{ perldata }{ $address }; last TREE; } } # ---------------------------------------- if( /^scalarref/ ) { # ---------------------------------------- $ref = defined $tree->[ $i+1 ][ 2 ] ? \ $tree->[ $i +1 ][ 2 ] : \''; if( exists $tree->[ $i+1 ][ 0 ]{ 'defined' } ) { if( $tree->[ $i +1 ][ 0 ]{ 'defined' } =~ /false/i ) { $ref = \ undef; } } $self->{ perldata }{ $address } = $ref if( $address ); if( $class ) { unless( int( eval( "\%$class"."::")) ) { eval { require $class; }; if( $@ ) { warn $@; } } bless $ref, $class; if( defined $callback && $ref->can( $callback ) ) { $ref->$callback(); } } last TREE; } # ---------------------------------------- if( /^hash(?:ref)?/ ) { # ---------------------------------------- $ref = {}; $self->{ perldata }{ $address } = $ref if( $address ); for (my $j = 1; $j < $#{$tree->[$i+1]}; $j+=2) { next unless $tree->[$i+1][$j] eq 'item'; my $item_tree = $tree->[$i+1][$j+1]; if( exists $item_tree->[0]{ key } ) { my $key = $item_tree->[ 0 ]{ key }; if( exists $item_tree->[ 0 ]{ 'defined' } ) { if( $item_tree->[ 0 ]{ 'defined' } =~ /false/ ) { $ref->{ $key } = undef; next; } } # ===== XML::PARSER IGNORES ZERO-LENGTH STRINGS # It indicates the presence of a zero-length string by # not having the array portion of the tag-name/array pair # values be of length 1. (Which is to say it captures only # the attributes of the tag and acknowledges that the tag # is an empty one. if( int( @{ $item_tree } ) == 1 ) { $ref->{ $key } = ''; next; } $ref->{ $key } = $self->undump( $item_tree, $callback ); } } if( $class ) { unless( int( eval( "\%$class"."::")) ) { eval { require $class; }; if( $@ ) { warn $@; } } bless $ref, $class; if( defined $callback && $ref->can( $callback ) ) { $ref->$callback(); } } last TREE; } # ---------------------------------------- if( /^arrayref/ ) { # ---------------------------------------- $ref = []; $self->{ perldata }{ $address } = $ref if( $address ); for (my $j = 1; $j < $#{$tree->[$i+1]}; $j+=2) { next unless $tree->[$i+1][$j] eq 'item'; my $item_tree = $tree->[$i+1][$j+1]; if( exists $item_tree->[0]{ key } ) { my $key = $item_tree->[0]{ key }; if( exists $item_tree->[ 0 ]{ 'defined' } ) { if( $item_tree->[ 0 ]{ 'defined' } =~ /false/ ) { $ref->[ $key ] = undef; next; } } # ===== XML::PARSER IGNORES ZERO-LENGTH STRINGS # See note above. if( int( @{ $item_tree } ) == 1 ) { $ref->[ $key ] = ''; next; } $ref->[ $key ] = $self->undump( $item_tree, $callback ); } } if( $class ) { unless( int( eval( "\%$class"."::")) ) { eval { require $class; }; if( $@ ) { warn $@; } } bless $ref, $class; if( defined $callback && $ref->can( $callback ) ) { $ref->$callback(); } } last TREE; } # ---------------------------------------- if( /^0$/ ) { # SIMPLE SCALAR # ---------------------------------------- $item = $tree->[$i + 1]; } } } ## If $ref is not set at this point, it means we've just ## encountered a scalar value directly inside the item tag. $ref = $item unless defined( $ref ); return ($ref); } # ============================================================ sub quote_xml_chars { # ============================================================ local $_ = shift; return $_ if not defined $_; s/&/&/g; s//>/g; s/'/'/g; s/"/"/g; return $_; } # ============================================================ sub xml2perl { # ============================================================ xml2pl( @_ ); } # ============================================================ sub xml2pl { # ============================================================ =item * xml2pl( $xml_or_filename, [ $callback ] ) - (Also xml2perl(), for those who enjoy readability over brevity.) Converts XML to a Perl datatype. If this method is given a second argument, XML::Dumper will use the second argument as a callback (if possible). If the first argument isn't XML and exists as a file, that file will be read and its contents will be used as the input XML. Currently, the only supported invocation of callbacks is through soft references. That is to say, the callback argument ought to be a string that matches the name of a callable method for your classes. If you have a congruent interface, this should work like a peach. If your class interface doesn't have such a named method, it won't be called. =cut # ------------------------------------------------------------ my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/) ? shift : $dump; my $xml = shift; my $callback = shift; $self->init; if( $xml !~ /\gzread( $buffer ) > 0 ) { push @xml, $buffer; } $gz->gzclose(); $xml = join "", @xml; } else { die "Compress::Zlib is not installed. Cannot read gzipped file '$file'"; } } else { open FILE, $file or die "Can't open file '$file' for reading $!"; my @xml = ; close FILE; $xml = join "", @xml; } } else { die "'$file' does not exist as a file and is not XML.\n"; } } my $parser = new XML::Parser(Style => 'Tree'); my $tree = $parser->parse($xml); # Skip enclosing "perldata" level my $topItem = $tree->[1]; my $ref = $self->undump($topItem, $callback); return($ref); } # ============================================================ sub xml_compare { # ============================================================ =item * xml_compare( $xml1, $xml2 ) - Compares xml for content Compares two dumped Perl data structures (that is, compares the xml) for identity in content. Use this function rather than perl's built-in string comparison, especially when dealing with perl data that is memory-location dependent (which pretty much means all references). This function will return true for any two perl data that are either deep clones of each other, or identical. This method is exported by default. =cut # ------------------------------------------------------------ my $xml1 = shift; my $xml2 = shift; $xml1 =~ s/(<[^>]*)\smemory_address="\dx[A-Za-z0-9]+"([^<]*>)/$1$2/g; $xml2 =~ s/(<[^>]*)\smemory_address="\dx[A-Za-z0-9]+"([^<]*>)/$1$2/g; $xml1 =~ s/(<[^>]*)\sdefined=\"false\"([^<]>)/$1$2/g; # For backwards $xml2 =~ s/(<[^>]*)\sdefined=\"false\"([^<]>)/$1$2/g; # compatibility $xml1 =~ s/<\?xml .*>//; # Ignore XML declaration $xml2 =~ s/<\?xml .*>//; $xml1 =~ s/<\!DOCTYPE perldata \[.*\]>//s; # Remove DTD $xml2 =~ s/<\!DOCTYPE perldata \[.*\]>//s; $xml1 =~ s/^\s*\s*\s*\s*$/>/; $xml2 =~ s/>\s*$/>/; return $xml1 eq $xml2; } # ============================================================ sub xml_identity { # ============================================================ =item * xml_identity( $xml1, $xml2 ) - Compares xml for identity Compares two dumped Perl data structures (that is, compares the xml) for identity in instantiation. This function will return true for any two perl data that are identical, but not for deep clones of each other. This method is also exported by default. =cut # ------------------------------------------------------------ my $xml1 = shift; my $xml2 = shift; return ( $xml1 eq $xml2 ); } =back =head1 EXPORTS By default, the following methods are exported: xml2pl, pl2xml, xml_compare, xml_identity =head1 BUGS AND DEPENDENCIES XML::Dumper has changed API since 0.4, as a response to a bug report from PerlMonks. I felt it was necessary, as the functions simply didn't work as advertised. That is, xml2pl really didnt accept xml as an argument; what it wanted was an XML Parse tree. To correct for the API change, simply don't parse the XML before feeding it to XML::Dumper. XML::Dumper also has no understanding of typeglobs (references or not), references to regular expressions, or references to Perl subroutines. Turns out that Data::Dumper doesn't do references to Perl subroutines, either, so at least I'm in somewhat good company. XML::Dumper requires one perl module, available from CPAN XML::Parser XML::Parser itself relies on Clark Cooper's Expat implementation in Perl, which in turn requires James Clark's expat package itself. See the documentation for XML::Parser for more information. =head1 REVISIONS AND CREDITS The list of credits got so long that I had to move it to the Changes file. Thanks to all those who've contributed with bug reports and suggested features! Keep 'em coming! I've had ownership of the module since June of 2002, and very much appreciate requests on how to make the module better. It has served me well, both as a learning tool on how I can repay my debt to the Perl Community, and as a practical module that is useful. I'm thrilled to be able to offer this bit of code. So, if you have suggestions, bug reports, or feature requests, please let me know and I'll do my best to make this a better module. =head1 CURRENT MAINTAINER Mike Wong Emike_w3@pacbell.netE XML::Dumper is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =head1 ORIGINAL AUTHOR Jonathan Eisenzopf Eeisen@pobox.comE =head1 SEE ALSO perl(1) Compress::Zlib(3) XML::Parser(3) Data::DumpXML(3) =cut package PostScript::Simple; use vars qw($VERSION @ISA @EXPORT); use Carp; use Exporter; @ISA = qw(Exporter); @EXPORT = qw(); =head1 NAME PostScript::Simple - Produce PostScript files from Perl =head1 SYNOPSIS use PostScript::Simple; # create a new PostScript object $p = new PostScript::Simple(papersize => "A4", colour => 1, eps => 0, units => "in"); # create a new page $p->newpage; # draw some lines and other shapes $p->line(1,1, 1,4); $p->linextend(2,4); $p->box(1.5,1, 2,3.5); $p->circle(2,2, 1); $p->setlinewidth( 0.01 ); $p->curve(1,5, 1,7, 3,7, 3,5); $p->curvextend(3,3, 5,3, 5,5); # draw a rotated polygon in a different colour $p->setcolour(0,100,200); $p->polygon({rotate=>45}, 1,1, 1,2, 2,2, 2,1, 1,1); # add some text in red $p->setcolour("red"); $p->setfont("Times-Roman", 20); $p->text(1,1, "Hello"); # write the output to a file $p->output("file.ps"); =head1 DESCRIPTION PostScript::Simple allows you to have a simple method of writing PostScript files from Perl. It has graphics primitives that allow lines, curves, circles, polygons and boxes to be drawn. Text can be added to the page using standard PostScript fonts. The images can be single page EPS files, or multipage PostScript files. The image size can be set by using a recognised paper size ("C", for example) or by giving dimensions. The units used can be specified ("C" or "C", etc) and are the same as those used in TeX. The default unit is a bp, or a PostScript point, unlike TeX. =head1 PREREQUISITES This module requires C and C. =head2 EXPORT None. =cut =head1 CONSTRUCTOR =over 4 =item C Create a new PostScript::Simple object. The different options that can be set are: =over 4 =item units Units that are to be used in the file. Common units would be C, C, C, C, and C. Others are as used in TeX. (Default: C) =item xsize Specifies the width of the drawing area in units. =item ysize Specifies the height of the drawing area in units. =item papersize The size of paper to use, if C or C are not defined. This allows a document to easily be created using a standard paper size without having to remember the size of paper using PostScript points. Valid choices are currently "C", "C", "C", and "C". =item landscape Use the landscape option to rotate the page by 90 degrees. The paper dimensions are also rotated, so that clipping will still work. (Note that the printer will still think that the paper is portrait.) (Default: 0) =item copies Set the number of copies that should be printed. (Default: 1) =item clip If set to 1, the image will be clipped to the xsize and ysize. This is most useful for an EPS image. (Default: 0) =item colour Specifies whether the image should be rendered in colour or not. If set to 0 (default) all requests for a colour are mapped to a greyscale. Otherwise the colour requested with C or C is used. This option is present because most modern laser printers are only black and white. (Default: 0) =item eps Generate an EPS file, rather than a standard PostScript file. If set to 1, no newpage methods will actually create a new page. This option is probably the most useful for generating images to be imported into other applications, such as TeX. (Default: 1) =item page Specifies the initial page number of the (multi page) document. The page number is set with the Adobe DSC comments, and is used nowhere else. It only makes finding your pages easier. See also the C method. (Default: 1) =item coordorigin Defines the co-ordinate origin for each page produced. Valid arguments are C, C, C and C. The default is C. =item direction The direction the co-ordinates go from the origin. Values can be C, C, C and C. The default value is C. =item reencode Requests that a font re-encode function be added and that the 13 standard PostScript fonts get re-encoded in the specified encoding. The most popular choice (other than undef) is 'ISOLatin1Encoding' which selects the iso8859-1 encoding and fits most of western Europe, including the Scandinavia. Refer to Adobes Postscript documentation for other encodings. The output file is, by default, re-encoded to ISOLatin1Encoding. To stop this happening, use 'reencode => undef'. To use the re-encoded font, '-iso' must be appended to the names of the fonts used, e.g. 'Helvetica-iso'. =back Example: $ref = new PostScript::Simple(landscape => 1, eps => 0, xsize => 4, ysize => 3, units => "in"); Create a document that is 4 by 3 inches and prints landscape on a page. It is not an EPS file, and must therefore use the C method. $ref = new PostScript::Simple(eps => 1, colour => 1, xsize => 12, ysize => 12, units => "cm", reencode => "ISOLatin1Encoding"); Create a 12 by 12 cm EPS image that is in colour. Note that "C 1>" did not have to be specified because this is the default. Re-encode the standard fonts into the iso8859-1 encoding, providing all the special characters used in Western Europe. The C method should not be used. =back =cut sub new# {{{ { my ($class, %data) = @_; # is there another colour database that can be used instead of defining # this one here? what about the X-windows one? (apart from MS-Win-probs?) XXXXX my %pscolours = (# {{{ black => "0 0 0", brightred => "1 0 0", brightgreen => "0 1 0", brightblue => "0 0 1", red => "0.8 0 0", green => "0 0.8 0", blue => "0 0 0.8", darkred => "0.5 0 0", darkgreen => "0 0.5 0", darkblue => "0 0 0.5", grey10 => "0.1 0.1 0.1", grey20 => "0.2 0.2 0.2", grey30 => "0.3 0.3 0.3", grey40 => "0.4 0.4 0.4", grey50 => "0.5 0.5 0.5", grey60 => "0.6 0.6 0.6", grey70 => "0.7 0.7 0.7", grey80 => "0.8 0.8 0.8", grey90 => "0.9 0.9 0.9", white => "1 1 1", );# }}} # define page sizes here (a4, letter, etc) # should be Properly Cased my %pspaper = (# {{{ A0 => '2384 3370', A1 => '1684 2384', A2 => '1191 1684', A3 => "841.88976 1190.5512", A4 => "595.27559 841.88976", A5 => "420.94488 595.27559", A6 => '297 420', A7 => '210 297', A8 => '148 210', A9 => '105 148', B0 => '2920 4127', B1 => '2064 2920', B2 => '1460 2064', B3 => '1032 1460', B4 => '729 1032', B5 => '516 729', B6 => '363 516', B7 => '258 363', B8 => '181 258', B9 => '127 181 ', B10 => '91 127', Executive => '522 756', Folio => '595 935', 'Half-Letter' => '612 397', Letter => "612 792", 'US-Letter' => '612 792', Legal => '612 1008', 'US-Legal' => '612 1008', Tabloid => '792 1224', 'SuperB' => '843 1227', Ledger => '1224 792', 'Comm #10 Envelope' => '297 684', 'Envelope-Monarch' => '280 542', 'Envelope-DL' => '312 624', 'Envelope-C5' => '461 648', 'EuroPostcard' => '298 420', );# }}} # The 13 standard fonts that are available on all PS 1 implementations: my @fonts = (# {{{ 'Courier', 'Courier-Bold', 'Courier-BoldOblique', 'Courier-Oblique', 'Helvetica', 'Helvetica-Bold', 'Helvetica-BoldOblique', 'Helvetica-Oblique', 'Times-Roman', 'Times-Bold', 'Times-BoldItalic', 'Times-Italic', 'Symbol');# }}} # define the origins for the page a document can have # (default is "LeftBottom") my %psorigin = (# {{{ 'LeftBottom' => '0 0', 'LeftTop' => '0 -1', 'RightBottom' => '-1 0', 'RightTop' => '-1 -1', );# }}} # define the co-ordinate direction (default is 'RightUp') my %psdirs = (# {{{ 'RightUp' => '1 1', 'RightDown' => '1 -1', 'LeftUp' => '-1 1', 'LeftDown' => '-1 -1', );# }}} # measuring units are two-letter acronyms as used in TeX: # bp: postscript point (72 per inch) # in: inch (72 postscript points) # pt: printer's point (72.27 per inch) # mm: millimetre (25.4 per inch) # cm: centimetre (2.54 per inch) # pi: pica (12 printer's points) # dd: didot point (67.567. per inch) # cc: cicero (12 didot points) # set up the others here (sp) XXXXX my %psunits = (# {{{ pt => "72 72.27", pc => "72 6.0225", in => "72 1", bp => "1 1", cm => "72 2.54", mm => "72 25.4", dd => "72 67.567", cc => "72 810.804", );# }}} my $self = { xsize => undef, ysize => undef, papersize => undef, units => "bp", # measuring units (see below) landscape => 0, # rotate the page 90 degrees copies => 1, # number of copies colour => 0, # use colour clip => 0, # clip to the bounding box eps => 1, # create eps file page => 1, # page number to start at reencode => "ISOLatin1Encoding", # Re-encode the 13 standard # fonts in this encoding bbx1 => 0, # Bounding Box definitions bby1 => 0, bbx2 => 0, bby2 => 0, pscomments => "", # the following entries store data psprolog => "", # for the same DSC areas of the psfunctions => "", # postscript file. pssetup => "", pspages => "", pstrailer => "", lastfontsize => 0, pspagecount => 0, usedcircle => 0, usedcircletext => 0, usedbox => 0, usedrotabout => 0, usedimporteps => 0, coordorigin => 'LeftBottom', direction => 'RightUp', psdirs => \%psdirs, psunits => \%psunits, pspaper => \%pspaper, pscolours => \%pscolours, psorigin => \%psorigin, fonts => \@fonts, version => '0.07', }; foreach (keys %data) { $self->{$_} = $data{$_}; } bless $self, $class; $self->init(); return $self; }# }}} sub init# {{{ { my $self = shift; my ($m, $d) = (1, 1); my ($u, $mm); my ($dx, $dy); # Units# {{{ if (defined $self->{units}) { $self->{units} = lc $self->{units}; } if (defined($self->{psunits}{$self->{units}})) { ($m, $d) = split(/\s+/, $self->{psunits}{$self->{units}}); } else { $self->_error( "unit '$self->{units}' undefined" ); } ($dx, $dy) = split(/\s+/, $self->{psdirs}{$self->{direction}}); # X direction $mm = $m * $dx; $u = "{"; if ($mm != 1) { $u .= "$mm mul " } if ($d != 1) { $u .= "$d div " } $u =~ s/ $//; $u .="}"; $self->{psfunctions} .= "/ux $u def\n"; # Y direction $mm = $m * $dy; $u = "{"; if ($mm != 1) { $u .= "$mm mul " } if ($d != 1) { $u .= "$d div " } $u =~ s/ $//; $u .="}"; $self->{psfunctions} .= "/uy $u def\n"; # General unit scale (circle radius, etc) $u = "{"; if ($m != 1) { $u .= "$m mul " } if ($d != 1) { $u .= "$d div " } $u =~ s/ $//; $u .="}"; $self->{psfunctions} .= "/u $u def\n"; #$u = "{"; #if ($m != 1) { $u .= "$m mul " } #if ($d != 1) { $u .= "$d div " } #$u =~ s/ $//; #$u .="}"; # #$self->{psfunctions} .= "/u $u def\n";# }}} # Paper size# {{{ if (defined $self->{papersize}) { $self->{papersize} = ucfirst lc $self->{papersize}; } if (!defined $self->{xsize} || !defined $self->{ysize}) { if (defined $self->{papersize} && defined $self->{pspaper}{$self->{papersize}}) { ($self->{xsize}, $self->{ysize}) = split(/\s+/, $self->{pspaper}{$self->{papersize}}); $self->{bbx2} = int($self->{xsize}); $self->{bby2} = int($self->{ysize}); $self->{pscomments} .= "\%\%DocumentMedia: $self->{papersize} $self->{xsize} "; $self->{pscomments} .= "$self->{ysize} 0 ( ) ( )\n"; } else { ($self->{xsize}, $self->{ysize}) = (100,100); $self->_error( "page size undefined" ); } } else { $self->{bbx2} = int(($self->{xsize} * $m) / $d); $self->{bby2} = int(($self->{ysize} * $m) / $d); }# }}} if (!$self->{eps}) { $self->{pssetup} .= "ll 2 ge { << /PageSize [ $self->{xsize} " . "$self->{ysize} ] /ImagingBBox null >>" . " setpagedevice } if\n"; } # Landscape# {{{ if ($self->{landscape}) { my $swap; $self->{psfunctions} .= "/landscape { $self->{bbx2} 0 translate 90 rotate } bind def "; # I now think that Portrait is the correct thing here, as the page is # rotated. $self->{pscomments} .= "\%\%Orientation: Portrait\n"; # $self->{pscomments} .= "\%\%Orientation: Landscape\n"; $swap = $self->{bbx2}; $self->{bbx2} = $self->{bby2}; $self->{bby2} = $swap; # for EPS files, change to landscape here, as there are no pages if ($self->{eps}) { $self->{pssetup} .= "landscape\n" } } else { $self->{pscomments} .= "\%\%Orientation: Portrait\n"; }# }}} # Clipping# {{{ if ($self->{clip}) { $self->{psfunctions} .= "/pageclip {newpath $self->{bbx1} $self->{bby1} moveto $self->{bbx1} $self->{bby2} lineto $self->{bbx2} $self->{bby2} lineto $self->{bbx2} $self->{bby1} lineto $self->{bbx1} $self->{bby1} lineto closepath clip} bind def "; if ($self->{eps}) { $self->{pssetup} .= "pageclip\n" } }# }}} # Font reencoding# {{{ if ($self->{reencode}) { my $encoding; # The name of the encoding my $ext; # The extention to tack onto the std fontnames if (ref $self->{reencode} eq 'ARRAY') { die "Custom reencoding of fonts not really implemented yet, sorry..."; $encoding = shift @{$self->{reencode}}; $ext = shift @{$self->{reencode}}; # TODO: Do something to add the actual encoding to the postscript code. } else { $encoding = $self->{reencode}; $ext = '-iso'; } $self->{psfunctions} .= <<'EOP'; /STARTDIFFENC { mark } bind def /ENDDIFFENC { % /NewEnc BaseEnc STARTDIFFENC number or glyphname ... ENDDIFFENC - counttomark 2 add -1 roll 256 array copy /TempEncode exch def % pointer for sequential encodings /EncodePointer 0 def { % Get the bottom object counttomark -1 roll % Is it a mark? dup type dup /marktype eq { % End of encoding pop pop exit } { /nametype eq { % Insert the name at EncodePointer % and increment the pointer. TempEncode EncodePointer 3 -1 roll put /EncodePointer EncodePointer 1 add def } { % Set the EncodePointer to the number /EncodePointer exch def } ifelse } ifelse } loop TempEncode def } bind def % Define ISO Latin1 encoding if it doesnt exist /ISOLatin1Encoding where { % (ISOLatin1 exists!) = pop } { (ISOLatin1 does not exist, creating...) = /ISOLatin1Encoding StandardEncoding STARTDIFFENC 144 /dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent /dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ENDDIFFENC } ifelse % Name: Re-encode Font % Description: Creates a new font using the named encoding. /REENCODEFONT { % /Newfont NewEncoding /Oldfont findfont dup length 4 add dict begin { % forall 1 index /FID ne 2 index /UniqueID ne and 2 index /XUID ne and { def } { pop pop } ifelse } forall /Encoding exch def % defs for DPS /BitmapWidths false def /ExactSize 0 def /InBetweenSize 0 def /TransformedChar 0 def currentdict end definefont pop } bind def % Reencode the std fonts: EOP for my $font (@{$self->{fonts}}) { $self->{psfunctions} .= "/${font}$ext $encoding /$font REENCODEFONT\n"; } }# }}} }# }}} =head1 OBJECT METHODS All object methods return 1 for success or 0 in some error condition (e.g. insufficient arguments). Error message text is also drawn on the page. =over 4 =item C Generates a new page on a PostScript file. If specified, C gives the number (or name) of the page. This method should not be used for EPS files. The page number is automatically incremented each time this is called without a new page number, or decremented if the current page number is negative. Example: $p->newpage(1); $p->newpage; $p->newpage("hello"); $p->newpage(-6); $p->newpage; will generate five pages, numbered: 1, 2, "hello", -6, -7. =cut sub newpage# {{{ { my $self = shift; my $nextpage = shift; my ($x, $y); if (defined($nextpage)) { $self->{page} = $nextpage; } if ($self->{eps}) { # Cannot have multiple pages in an EPS file XXXXX $self->_error("Do not use newpage for eps files!"); return 0; } if ($self->{pspagecount} != 0) { $self->{pspages} .= "\%\%PageTrailer\npagelevel restore\nshowpage\n"; } $self->{pspagecount} ++; $self->{pspages} .= "\%\%Page: $self->{page} $self->{pspagecount}\n"; if ($self->{page} >= 0) { $self->{page} ++; } else { $self->{page} --; } $self->{pspages} .= "\%\%BeginPageSetup\n"; $self->{pspages} .= "/pagelevel save def\n"; if ($self->{landscape}) { $self->{pspages} .= "landscape\n" } if ($self->{clip}) { $self->{pspages} .= "pageclip\n" } ($x, $y) = split(/\s+/, $self->{psorigin}{$self->{coordorigin}}); $x = $self->{xsize} if ($x < 0); $y = $self->{ysize} if ($y < 0); $self->{pspages} .= "$x $y translate\n" if (($x != 0) || ($y != 0)); $self->{pspages} .= "\%\%EndPageSetup\n"; return 1; }# }}} =item C Writes the current PostScript out to the file named C. Will destroy any existing file of the same name. Use this method whenever output is required to disk. The current PostScript document in memory is not cleared, and can still be extended. =cut sub _builddocument# {{{ { my $self = shift; my $title = shift; my $page; my $date = scalar localtime; my $user; $title = 'undefined' unless $title; $page = []; # getlogin is unimplemented on some systems eval { $user = getlogin; }; $user = 'Console' unless $user; # Comments Section push @$page, "%!PS-Adobe-3.0"; push @$page, " EPSF-1.2" if ($self->{eps}); push @$page, "\n"; push @$page, "\%\%Title: ($title)\n"; push @$page, "\%\%LanguageLevel: 1\n"; my $version = $self->{'version'}; push @$page, "\%\%Creator: PostScript::Simple perl module version $version\n"; push @$page, "\%\%CreationDate: $date\n"; push @$page, "\%\%For: $user\n"; push @$page, \$self->{pscomments}; # push @$page, "\%\%DocumentFonts: \n"; if ($self->{eps}) { push @$page, "\%\%BoundingBox: $self->{bbx1} $self->{bby1} $self->{bbx2} $self->{bby2}\n"; } else { push @$page, "\%\%Pages: $self->{pspagecount}\n"; } push @$page, "\%\%EndComments\n"; # Prolog Section push @$page, "\%\%BeginProlog\n"; push @$page, "/ll 1 def systemdict /languagelevel known {\n"; push @$page, "/ll languagelevel def } if\n"; push @$page, \$self->{psprolog}; push @$page, "\%\%BeginResource: PostScript::Simple\n"; push @$page, \$self->{psfunctions}; push @$page, "\%\%EndResource\n"; push @$page, "\%\%EndProlog\n"; # Setup Section if (length($self->{pssetup}) || ($self->{copies} > 1)) { push @$page, "\%\%BeginSetup\n"; if ($self->{copies} > 1) { push @$page, "/#copies " . $self->{copies} . " def\n"; } push @$page, \$self->{pssetup}; push @$page, "\%\%EndSetup\n"; } # Pages push @$page, \$self->{pspages}; if ((!$self->{eps}) && ($self->{pspagecount} > 0)) { push @$page, "\%\%PageTrailer\n"; push @$page, "pagelevel restore\n"; push @$page, "showpage\n"; } # Trailer Section if (length($self->{pstrailer})) { push @$page, "\%\%Trailer\n"; push @$page, \$self->{pstrailer}; } push @$page, "\%\%EOF\n"; return $page; }# }}} sub output# {{{ { my $self = shift; my $file = shift || die("Must supply a filename for output"); my $page; my $i; $page = _builddocument($self, $file); local *OUT; open(OUT, '>'.$file) or die("Cannot write to file $file: $!"); foreach $i (@$page) { if (ref($i) eq "SCALAR") { print OUT $$i; } else { print OUT $i; } } close OUT; return 1; }# }}} =item C Returns the current document. Use this method whenever output is required as a scalar. The current PostScript document in memory is not cleared, and can still be extended. =cut sub get# {{{ { my $self = shift; my $page; my $i; my $doc; $page = _builddocument($self, "PostScript::Simple generated page"); $doc = ""; foreach $i (@$page) { if (ref($i) eq "SCALAR") { $doc .= $$i; } else { $doc .= $i; } } return $doc; }# }}} =item C Returns the current document as a PostScript::Simple::EPS object. Only works if the current document is EPS. This method calls new PostScript::Simple::EPS with all the default options. To change these, call it yourself as below, rather than using this method. $eps = new PostScript::Simple::EPS(source => $ps->get); =cut sub geteps# {{{ { my $self = shift; my $page; my $i; my $doc; my $eps; croak "document is not EPS" unless ($$self{eps} == 1); $eps = new PostScript::Simple::EPS(source => $self->get); return $eps; }# }}} =item C Sets the new drawing colour to the values specified in C, C and C. The values range from 0 to 255. Alternatively, a colour name may be specified. Those currently defined are listed at the top of the PostScript::Simple module in the C<%pscolours> hash. Example: # set new colour to brown $p->setcolour(200,100,0); # set new colour to black $p->setcolour("black"); =cut sub setcolour# {{{ { my $self = shift; my ($r, $g, $b) = @_; if ( @_ == 1 ) { $r = lc $r; if (defined $self->{pscolours}{$r}) { ($r, $g, $b) = split(/\s+/, $self->{pscolours}{$r}); } else { $self->_error( "bad colour name '$r'" ); return 0; } } elsif ( @_ == 3 ) { $r /= 255; $g /= 255; $b /= 255; } else { if (not defined $r) { $r = 'undef' } if (not defined $g) { $g = 'undef' } if (not defined $b) { $b = 'undef' } $self->_error( "setcolour given invalid arguments: $r, $g, $b" ); return 0; } if ($self->{colour}) { $self->{pspages} .= "$r $g $b setrgbcolor\n"; } else { $r = 0.3*$r + 0.59*$g + 0.11*$b; ##PKENT - better colour->grey conversion $self->{pspages} .= "$r setgray\n"; } return 1; }# }}} =item C Sets the new line width to C units. Example: # draw a line 10mm long and 4mm wide $p = new PostScript::Simple(units => "mm"); $p->setlinewidth(4); $p->line(10,10, 20,10); =cut sub setlinewidth# {{{ { my $self = shift; my $width = shift || do { $self->_error( "setlinewidth not given a width" ); return 0; }; # MCN should allow for option units=>"cm" on each setlinewidth / line / polygon etc ##PKENT - good idea, should we have names for line weights, like we do for colours? if ($width eq "thin") { $width = "0.4" } else { $width .= " u" } $self->{pspages} .= "$width setlinewidth\n"; return 1; }# }}} =item C Draws a line from the co-ordinates (x1,x2) to (x2,y2). If values are specified for C, C and C, then the colour is set before the line is drawn. Example: # set the colour to black $p->setcolour("black"); # draw a line in the current colour (black) $p->line(10,10, 10,20); # draw a line in red $p->line(20,10, 20,20, 255,0,0); # draw another line in red $p->line(30,10, 30,20); =cut sub line# {{{ { my $self = shift; my ($x1, $y1, $x2, $y2, $r, $g, $b) = @_; # dashed lines? XXXXX # MCN should allow for option units=>"cm" on each setlinewidth / line / polygon etc if ((!$self->{pspagecount}) and (!$self->{eps})) { # Cannot draw on to non-page when not an eps file XXXXX return 0; } if ( @_ == 7 ) { $self->setcolour($r, $g, $b); } elsif ( @_ != 4 ) { $self->_error( "wrong number of args for line" ); return 0; } $self->newpath; $self->moveto($x1, $y1); $self->{pspages} .= "$x2 ux $y2 uy lineto stroke\n"; return 1; }# }}} =item C Assuming the previous command was C, C, C or C, extend that line to include another segment to the co-ordinates (x,y). Behaviour after any other method is unspecified. Example: $p->line(10,10, 10,20); $p->linextend(20,20); $p->linextend(20,10); $p->linextend(10,10); Notes The C method may be more appropriate. =cut sub linextend# {{{ { my $self = shift; my ($x, $y) = @_; unless ( @_ == 2 ) { $self->_error( "wrong number of args for linextend" ); return 0; } $self->{pspages} =~ s/eto stroke\n$/eto\n$x ux $y uy lineto stroke\n/; ##PKENT comments: lineto can follow a curveto or a lineto, hence the change in regexp ##also I thought that it'd be better to change the '.*$' in the regexp with '\n$' - perhaps ##we need something like $self->{_lastcommand} to know if operations are valid? # $self->{pspages} .= "$x ux $y uy lineto stroke\n"; # XXXXX fixme return 1; }# }}} =item C Draws an arc on the circle of radius C with centre (C,C). The arc starts at angle C and finishes at C. Angles are specified in degrees, where 0 is at 3 o'clock, and the direction of travel is anti-clockwise. Any options are passed in a hash reference as the first parameter. The available option is: =over 4 =item filled => 1 If C is 1 then the arc will be filled in. =back Example: # semi-circle $p->arc(10, 10, 5, 0, 180); # complete filled circle $p->arc({filled=>1}, 30, 30, 10, 0, 360); =cut sub arc# {{{ { my $self = shift; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } if ((!$self->{pspagecount}) and (!$self->{eps})) { # Cannot draw on to non-page when not an eps file XXXXX return 0; } my ($x, $y, $r, $sa, $ea) = @_; unless (@_ == 5) { $self->_error("arc: wrong number of arguments"); return 0; } $self->newpath; $self->{pspages} .= "$x ux $y uy $r u $sa $ea arc "; if ($opt{'filled'}) { $self->{pspages} .= "fill\n" } else { $self->{pspages} .= "stroke\n" } return 1; }# }}} =item C The C method is multi-function, allowing many shapes to be created and manipulated. Polygon draws lines from (x1,y1) to (x2,y2) and then from (x2,y2) to (x3,y3) up to (xn-1,yn-1) to (xn,yn). Any options are passed in a hash reference as the first parameter. The available options are as follows: =over 4 =item rotate => angle =item rotate => [angle,x,y] Rotate the polygon by C degrees anti-clockwise. If x and y are specified then use the co-ordinate (x,y) as the centre of rotation, otherwise use the co-ordinate (x1,y1) from the main polygon. =item filled => 1 If C is 1 then the PostScript output is set to fill the object rather than just draw the lines. =item offset => [x,y] Displace the object by the vector (x,y). =back Example: # draw a square with lower left point at (10,10) $p->polygon(10,10, 10,20, 20,20, 20,10, 10,10); # draw a filled square with lower left point at (20,20) $p->polygon( {offset => [10,10], filled => 1}, 10,10, 10,20, 20,20, 20,10, 10,10); # draw a filled square with lower left point at (10,10) # rotated 45 degrees (about the point (10,10)) $p->polygon( {rotate => 45, filled => 1}, 10,10, 10,20, 20,20, 20,10, 10,10); =cut sub polygon# {{{ { my $self = shift; my %opt = (); my ($xoffset, $yoffset) = (0,0); my ($rotate, $rotatex, $rotatey) = (0,0,0); # PKENT comments - the first arg could be an optional hashref of options. See if # it's there with ref($_[0]) If it is, then shift it off and use those options. # Could take the form: polygon( { offset => [ 10, 10 ], filled => 0, rotate => # 45, rotate => [45, 10, 10] }, $x1, ... it seems neater to use perl native # structures instead of manipulating strings # ... done MCN 2002-10-22 if ($#_ < 3) { # cannot have polygon with just one point... $self->_error( "bad polygon - not enough points" ); return 0; } if (ref($_[0])) { %opt = %{; shift}; } my $x = shift; my $y = shift; if (defined $opt{'rotate'}) { if (ref($opt{'rotate'})) { ($rotate, $rotatex, $rotatey) = @{$opt{'rotate'}}; } else { ($rotate, $rotatex, $rotatey) = ($opt{'rotate'}, $x, $y); } } if (defined $opt{'offset'}) { if (ref($opt{'offset'})) { ($xoffset, $yoffset) = @{$opt{'offset'}}; } else { $self->_error("polygon: bad offset option" ); return 0; } } if (!defined $opt{'filled'}) { $opt{'filled'} = 0; } unless (defined($x) && defined($y)) { $self->_error("polygon: no start point"); return 0; } my $savestate = ($xoffset || $yoffset || $rotate) ? 1 : 0 ; if ( $savestate ) { $self->{pspages} .= "gsave "; } if ($xoffset || $yoffset) { $self->{pspages} .= "$xoffset ux $yoffset uy translate\n"; #$self->{pspages} .= "$xoffset u $yoffset u translate\n"; ? } if ($rotate) { if (!$self->{usedrotabout}) { $self->{psfunctions} .= "/rotabout {3 copy pop translate rotate exch 0 exch sub exch 0 exch sub translate} def\n"; $self->{usedrotabout} = 1; } $self->{pspages} .= "$rotatex ux $rotatey uy $rotate rotabout\n"; # $self->{pspages} .= "gsave $rotatex ux $rotatey uy translate "; # $self->{pspages} .= "$rotate rotate -$rotatex ux -$rotatey uy translate\n"; } $self->newpath; $self->moveto($x, $y); while ($#_ > 0) { my $x = shift; my $y = shift; $self->{pspages} .= "$x ux $y uy lineto "; } if ($opt{'filled'}) { $self->{pspages} .= "fill\n"; } else { $self->{pspages} .= "stroke\n"; } if ( $savestate ) { $self->{pspages} .= "grestore\n"; } return 1; }# }}} =item C Plot a circle with centre at (x,y) and radius of r. There is only one option. =over 4 =item filled => 1 If C is 1 then the PostScript output is set to fill the object rather than just draw the lines. =back Example: $p->circle(40,40, 20); $p->circle( {filled => 1}, 62,31, 15); =cut sub circle# {{{ { my $self = shift; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } my ($x, $y, $r) = @_; unless (@_ == 3) { $self->_error("circle: wrong number of arguments"); return 0; } if (!$self->{usedcircle}) { $self->{psfunctions} .= "/circle {newpath 0 360 arc closepath} bind def\n"; $self->{usedcircle} = 1; } $self->{pspages} .= "$x ux $y uy $r u circle "; if ($opt{'filled'}) { $self->{pspages} .= "fill\n" } else {$self->{pspages} .= "stroke\n" } return 1; }# }}} =item C Draw text in an arc centered about angle C with circle midpoint (C,C) and radius C. There is only one option. =over 4 =item align => "alignment" C can be 'inside' or 'outside'. The default is 'inside'. =back Example: # outside the radius, centered at 90 degrees from the origin $p->circletext(40, 40, 20, 90, "Hello, Outside World!"); # inside the radius centered at 270 degrees from the origin $p->circletext( {align => "inside"}, 40, 40, 20, 270, "Hello, Inside World!"); =cut sub circletext# {{{ { my $self = shift; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } my ($x, $y, $r, $a, $text) = @_; unless (@_ == 5) { $self->_error("circletext: wrong number of arguments"); return 0; } unless (defined $self->{lastfontsize}) { $self->_error("circletext: must set font first"); return 0; } if (!$self->{usedcircletext}) { $self->{psfunctions} .= <<'EOCT'; /outsidecircletext { $circtextdict begin /radius exch def /centerangle exch def /ptsize exch def /str exch def /xradius radius ptsize 4 div add def gsave centerangle str findhalfangle add rotate str { /charcode exch def ( ) dup 0 charcode put outsideshowcharandrotate } forall grestore end } def /insidecircletext { $circtextdict begin /radius exch def /centerangle exch def /ptsize exch def /str exch def /xradius radius ptsize 3 div sub def gsave centerangle str findhalfangle sub rotate str { /charcode exch def ( ) dup 0 charcode put insideshowcharandrotate } forall grestore end } def /$circtextdict 16 dict def $circtextdict begin /findhalfangle { stringwidth pop 2 div 2 xradius mul pi mul div 360 mul } def /outsideshowcharandrotate { /char exch def /halfangle char findhalfangle def gsave halfangle neg rotate radius 0 translate -90 rotate char stringwidth pop 2 div neg 0 moveto char show grestore halfangle 2 mul neg rotate } def /insideshowcharandrotate { /char exch def /halfangle char findhalfangle def gsave halfangle rotate radius 0 translate 90 rotate char stringwidth pop 2 div neg 0 moveto char show grestore halfangle 2 mul rotate } def /pi 3.1415926 def end EOCT $self->{usedcircletext} = 1; } $self->{pspages} .= "gsave\n"; $self->{pspages} .= " $x ux $y uy translate\n"; $self->{pspages} .= " ($text) $self->{lastfontsize} $a $r u "; if ($opt{'align'} && ($opt{'align'} eq "outside")) { $self->{pspages} .= "outsidecircletext\n"; } else { $self->{pspages} .= "insidecircletext\n"; } $self->{pspages} .= "grestore\n"; return 1; }# }}} =item C Draw a rectangle from lower left co-ordinates (x1,y1) to upper right co-ordinates (y1,y2). Options are: =over 4 =item filled => 1 If C is 1 then fill the rectangle. =back Example: $p->box(10,10, 20,30); $p->box( {filled => 1}, 10,10, 20,30); Notes The C method is far more flexible, but this method is quicker! =cut sub box# {{{ { my $self = shift; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } my ($x1, $y1, $x2, $y2) = @_; unless (@_ == 4) { $self->_error("box: wrong number of arguments"); return 0; } if (!defined($opt{'filled'})) { $opt{'filled'} = 0; } unless ($self->{usedbox}) { $self->{psfunctions} .= "/box { newpath 3 copy pop exch 4 copy pop pop 8 copy pop pop pop pop exch pop exch 3 copy pop pop exch moveto lineto lineto lineto pop pop pop pop closepath } bind def "; $self->{usedbox} = 1; } $self->{pspages} .= "$x1 ux $y1 uy $x2 ux $y2 uy box "; if ($opt{'filled'}) { $self->{pspages} .= "fill\n" } else {$self->{pspages} .= "stroke\n" } return 1; }# }}} =item C Set the current font to the PostScript font C. Set the size in PostScript points to C. Notes This method must be called on every page before the C method is used. =cut sub setfont# {{{ { my $self = shift; my ($name, $size, $ysize) = @_; unless (@_ == 2) { $self->_error( "wrong number of arguments for setfont" ); return 0; } # set font y size XXXXX $self->{pspages} .= "/$name findfont $size scalefont setfont\n"; $self->{lastfontsize} = $size; return 1; }# }}} =item C Plot text on the current page with the lower left co-ordinates at (x,y) and using the current font. The text is specified in C. Options are: =over 4 =item align => "alignment" alignment can be 'left', 'centre' or 'right'. The default is 'left'. =item rotate => angle "rotate" degrees of rotation, defaults to 0 (i.e. no rotation). The angle to rotate the text, in degrees. Centres about (x,y) and rotates clockwise. (?). Default 0 degrees. =back Example: $p->setfont("Times-Roman", 12); $p->text(40,40, "The frog sat on the leaf in the pond."); $p->text( {align => 'centre'}, 140,40, "This is centered."); $p->text( {rotate => 90}, 140,40, "This is rotated."); $p->text( {rotate => 90, align => 'centre'}, 140,40, "This is both."); =cut sub text# {{{ { my $self = shift; my $rot = ""; my $rot_m = ""; my $align = ""; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } unless ( @_ == 3 ) { # check required params first $self->_error("text: wrong number of arguments"); return 0; } my ($x, $y, $text) = @_; unless (defined($x) && defined($y) && defined($text)) { $self->_error("text: wrong number of arguments"); return 0; } # Escape text to allow parentheses $text =~ s|([\\])|\\$1|g; $text =~ s/([\x00-\x1f\x7f-\xff])/sprintf('\\%03o',ord($1))/ge; $self->newpath; $self->moveto($x, $y); # rotation if (defined $opt{'rotate'}) { my $rot_a = $opt{ 'rotate' }; if( $rot_a != 0 ) { $rot = " $rot_a rotate "; $rot_a = -$rot_a; $rot_m = " $rot_a rotate "; }; } # alignment $align = " show stroke"; # align left if (defined $opt{'align'}) { $align = " dup stringwidth pop neg 0 rmoveto show" if $opt{ 'align' } eq 'right'; $align = " dup stringwidth pop 2 div neg 0 rmoveto show" if $opt{ 'align' } eq 'center' or $opt{ 'align' } eq 'centre'; } $self->{pspages} .= "($text) $rot $align $rot_m\n"; return 1; }# }}} =item curve( x1, y1, x2, y2, x3, y3, x4, y4 ) Create a curve from (x1, y1) to (x4, y4). (x2, y2) and (x3, y3) are the control points for the start- and end-points respectively. =cut sub curve# {{{ { my $self = shift; my ($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4) = @_; # dashed lines? XXXXX unless ( @_ == 8 ) { $self->_error( "bad curve definition, wrong number of args" ); return 0; } if ((!$self->{pspagecount}) and (!$self->{eps})) { # Cannot draw on to non-page when not an eps file XXXXX return 0; } $self->newpath; $self->moveto($x1, $y1); $self->{pspages} .= "$x2 ux $y2 uy $x3 ux $y3 uy $x4 ux $y4 uy curveto stroke\n"; return 1; }# }}} =item curvextend( x1, y1, x2, y2, x3, y3 ) Assuming the previous command was C, C, C or C, extend that path with another curve segment to the co-ordinates (x3, y3). (x1, y1) and (x2, y2) are the control points. Behaviour after any other method is unspecified. =cut sub curvextend# {{{ { my $self = shift; my ($x1, $y1, $x2, $y2, $x3, $y3) = @_; unless ( @_ == 6 ) { $self->_error( "bad curvextend definition, wrong number of args" ); return 0; } # curveto may follow a lineto etc... $self->{pspages} =~ s/eto stroke\n$/eto\n$x1 ux $y1 uy $x2 ux $y2 uy $x3 ux $y3 uy curveto stroke\n/; return 1; }# }}} =item newpath This method is used internally to begin a new drawing path - you should generally NEVER use it. =cut sub newpath# {{{ { my $self = shift; $self->{pspages} .= "newpath\n"; return 1; }# }}} =item moveto( x, y ) This method is used internally to move the cursor to a new point at (x, y) - you will generally NEVER use this method. =cut sub moveto# {{{ { my $self = shift; my ($x, $y) = @_; $self->{pspages} .= "$x ux $y uy moveto\n"; return 1; }# }}} =item C Imports an EPS file and scales/translates its bounding box to fill the area defined by lower left co-ordinates (x1,y1) and upper right co-ordinates (x2,y2). By default, if the co-ordinates have a different aspect ratio from the bounding box, the scaling is constrained on the greater dimension to keep the EPS fully inside the area. Options are: =over 4 =item overlap => 1 If C is 1 then the scaling is calculated on the lesser dimension and the EPS can overlap the area. =item stretch => 1 If C is 1 then fill the entire area, ignoring the aspect ratio. This option overrides C if both are given. =back Example: # Assume smiley.eps is a round smiley face in a square bounding box # Scale it to a (10,10)(20,20) box $p->importepsfile("smiley.eps", 10,10, 20,20); # Keeps aspect ratio, constrained to smallest fit $p->importepsfile("smiley.eps", 10,10, 30,20); # Keeps aspect ratio, allowed to overlap for largest fit $p->importepsfile( {overlap => 1}, "smiley.eps", 10,10, 30,20); # Aspect ratio is changed to give exact fit $p->importepsfile( {stretch => 1}, "smiley.eps", 10,10, 30,20); =cut sub importepsfile# {{{ { my $self = shift; my $bbllx; my $bblly; my $bburx; my $bbury; my $bbw; my $bbh; my $pagew; my $pageh; my $scalex; my $scaley; my $line; my $eps; my %opt = (); if (ref($_[0])) { %opt = %{; shift}; } my ($file, $x1, $y1, $x2, $y2) = @_; unless (@_ == 5) { $self->_error("importepsfile: wrong number of arguments"); return 0; } $opt{'overlap'} = 0 if (!defined($opt{'overlap'})); $opt{'stretch'} = 0 if (!defined($opt{'stretch'})); $eps = new PostScript::Simple::EPS(file => $file); ($bbllx, $bblly, $bburx, $bbury) = $eps->get_bbox(); $pagew = $x2 - $x1; $pageh = $y2 - $y1; $bbw = $bburx - $bbllx; $bbh = $bbury - $bblly; if (($bbw == 0) || ($bbh == 0)) { $self->_error("importeps: Bounding Box has zero dimension"); return 0; } $scalex = $pagew / $bbw; $scaley = $pageh / $bbh; if ($opt{'stretch'} == 0) { if ($opt{'overlap'} == 0) { if ($scalex > $scaley) { $scalex = $scaley; } else { $scaley = $scalex; } } else { if ($scalex > $scaley) { $scaley = $scalex; } else { $scalex = $scaley; } } } $eps->scale($scalex, $scaley); $eps->translate(-$bbllx, -$bblly); $self->_add_eps($eps, $x1, $y1); return 1; }# }}} =item C Imports a PostScript::Simple::EPS object into the current document at position C<(x,y)>. Example: use PostScript::Simple; # create a new PostScript object $p = new PostScript::Simple(papersize => "A4", colour => 1, units => "in"); # create a new page $p->newpage; # create an eps object $e = new PostScript::Simple::EPS(file => "test.eps"); $e->rotate(90); $e->scale(0.5); # add eps to the current page $p->importeps($e, 10,50); =cut sub importeps# {{{ { my $self = shift; my ($epsobj, $xpos, $ypos) = @_; unless (@_ == 3) { $self->_error("importeps: wrong number of arguments"); return 0; } $self->_add_eps($epsobj, $xpos, $ypos); return 1; }# }}} sub _add_eps# {{{ { my $self = shift; my $epsobj; my $xpos; my $ypos; if (ref($_[0]) ne "PostScript::Simple::EPS") { croak "internal error: _add_eps[0] must be eps object"; } if ((!$self->{pspagecount}) and (!$self->{eps})) { # Cannot draw on to non-page when not an eps file $self->_error("importeps: no current page"); return 0; } if ( @_ != 3 ) { croak "internal error: wrong number of arguments for _add_eps"; return 0; } unless ($self->{usedimporteps}) { $self->{psfunctions} .= <<'EOEPS'; /BeginEPSF { /b4_Inc_state save def /dict_count countdictstack def /op_count count 1 sub def userdict begin /showpage { } def 0 setgray 0 setlinecap 1 setlinewidth 0 setlinejoin 10 setmiterlimit [ ] 0 setdash newpath /languagelevel where { pop languagelevel 1 ne { false setstrokeadjust false setoverprint } if } if } bind def /EndEPSF { count op_count sub {pop} repeat countdictstack dict_count sub {end} repeat b4_Inc_state restore } bind def EOEPS $self->{usedimporteps} = 1; } ($epsobj, $xpos, $ypos) = @_; $self->{pspages} .= "BeginEPSF\n"; $self->{pspages} .= "$xpos ux $ypos uy translate\n"; $self->{pspages} .= "1 ux 1 uy scale\n"; $self->{pspages} .= $epsobj->_get_include_data($xpos, $ypos); $self->{pspages} .= "EndEPSF\n"; return 1; }# }}} ### PRIVATE sub _error {# {{{ my $self = shift; my $msg = shift; $self->{pspages} .= "(error: $msg\n) print flush\n"; }# }}} # Display method for debugging internal variables # #sub display { # my $self = shift; # my $i; # # foreach $i (keys(%{$self})) # { # print "$i = $self->{$i}\n"; # } #} =back =head1 BUGS Some current functionality may not be as expected, and/or may not work correctly. That's the fun with using code in development! =head1 AUTHOR The PostScript::Simple module was created by Matthew Newton, with ideas and suggestions from Mark Withall and many other people from around the world. Thanks! Please see the README file in the distribution for more information about contributors. Copyright (C) 2002-2003 Matthew C. Newton / Newton Computing This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details, available at http://www.gnu.org/licenses/gpl.html. =head1 SEE ALSO L =cut # vim:foldmethod=marker: package PostScript::Simple::EPS; use Exporter; use Carp; use vars qw($VERSION @ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw(); $VERSION = "0.01"; =head1 NAME PostScript::Simple::EPS - EPS support for PostScript::Simple =head1 SYNOPSIS use PostScript::Simple; # create a new PostScript object $p = new PostScript::Simple(papersize => "A4", colour => 1, units => "in"); # create a new page $p->newpage; # add an eps file $p->add_eps({xsize => 3}, "test.eps", 1,1); $p->add_eps({yscale => 1.1, xscale => 1.8}, "test.eps", 4,8); # create an eps object $e = new PostScript::Simple::EPS(file => "test.eps"); $e->rotate(90); $e->xscale(0.5); $p->add_eps($e, 3, 3); # add eps object to postscript object $e->xscale(2); $p->add_eps($e, 2, 5); # add eps object to postscript object again # write the output to a file $p->output("file.ps"); =head1 DESCRIPTION PostScript::Simple::EPS allows you to add EPS files into PostScript::Simple objects. Included EPS files can be scaled and rotated, and placed anywhere inside a PostScript::Simple page. Remember when using translate/scale/rotate that you will normally need to do the operations in the reverse order to that which you expect. =head1 PREREQUISITES This module requires C, C, C and C. =head2 EXPORT None. =cut =head1 CONSTRUCTOR =over 4 =item C Create a new PostScript::Simple::EPS object. The options that can be set are: =over 4 =item file EPS file to be included. This or C must exist when the C method is called. =item source PostScript code for the EPS document. Either this or C must be set when C is called. =item clip Set to 0 to disable clipping to the EPS bounding box. Default is to clip. =back Example: $ps = new PostScript::Simple(landscape => 1, eps => 0, xsize => 4, ysize => 3, units => "in"); $eps = new PostScript::Simple::EPS(file => "test.eps"); $eps->scale(0.5); Scale the EPS file by x0.5 in both directions. $ps->newpage(); $ps->importeps($eps, 1, 1); Add the EPS file to the PostScript document at coords (1,1). $ps->importepsfile("another.eps", 1, 2, 4, 4); Easily add an EPS file to the PostScript document using bounding box (1,2),(4,4). The methods C and C are described in the documentation of C. =back =cut sub new# {{{ { my ($class, %data) = @_; my $self = { file => undef, # filename of the eps file xsize => undef, ysize => undef, units => "bp", # measuring units (see below) clip => 1, # clip to the bounding box bbx1 => 0, # Bounding Box definitions bby1 => 0, bbx2 => 0, bby2 => 0, epsprefix => [], epsfile => undef, epspostfix => [], }; foreach (keys %data) { $self->{$_} = $data{$_}; } if ((!defined $self->{"file"}) && (!defined $self->{"source"})) { croak "must provide file or source"; } if ((defined $self->{"file"}) && (defined $self->{"source"})) { croak "cannot provide both file and source"; } bless $self, $class; $self->init(); return $self; }# }}} sub _getfilebbox# {{{ { my $self = shift; my $foundbbx = 0; return 0 if (!defined $$self{file}); open EPS, "< $$self{file}" || croak "can't open eps file $$self{file}"; SCAN: while () { s/[\r\n]*$//; #ultimate chomp if (/^\%\%BoundingBox:\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)\s*$/) { $$self{bbx1} = $1; $$self{bby1} = $2; $$self{bbx2} = $3; $$self{bby2} = $4; $foundbbx = 1; last SCAN; } } close EPS; return $foundbbx; }# }}} sub _getsourcebbox# {{{ { my $self = shift; return 0 if (!defined $$self{epsfile}); if ($$self{epsfile} =~ /^\%\%BoundingBox:\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)$/m) { $$self{bbx1} = $1; $$self{bby1} = $2; $$self{bbx2} = $3; $$self{bby2} = $4; return 1; } return 0; }# }}} sub init# {{{ { my $self = shift; my $foundbbx = 0; if (defined($$self{source})) { # with dynamic generated file, what do we do with {Begin,End}Document? # $$self{"epsfile"} = "\%\%BeginDocument: $$self{file}\n"; # $$self{"epsfile"} .= "\%\%EndDocument\n"; $$self{"epsfile"} = $$self{"source"}; delete $$self{"source"}; croak "EPS file must contain a BoundingBox" if (!$self->_getsourcebbox()); } else { croak "EPS file must contain a BoundingBox" if (!_getfilebbox($self)); } if (($$self{bbx2} - $$self{bbx1} == 0) || ($$self{bby2} - $$self{bby1} == 0)) { $self->_error("PostScript::Simple::EPS: Bounding Box has zero dimension"); return 0; } $self->reset(); return 1; }# }}} =head1 OBJECT METHODS All object methods return 1 for success or 0 in some error condition (e.g. insufficient arguments). Error message text is also drawn on the page. =over 4 =item C Returns the EPS bounding box, as specified on the %%BoundingBox line of the EPS file. Units are standard PostScript points. Example: ($x1, $y1, $x2, $y2) = $eps->get_bbox(); =cut sub get_bbox# {{{ { my $self = shift; return ($$self{bbx1}, $$self{bby1}, $$self{bbx2}, $$self{bby2}); }# }}} =item C Returns the EPS width. Example: print "EPS width is " . abs($eps->width()) . "\n"; =cut sub width# {{{ { my $self = shift; return ($$self{bbx2} - $$self{bbx1}); }# }}} =item C Returns the EPS height. Example: To scale $eps to 72 points high, do: $eps->scale(1, 72/$eps->height()); =cut sub height# {{{ { my $self = shift; return ($$self{bby2} - $$self{bby1}); }# }}} =item C Scales the EPS file. To scale in one direction only, specify 1 as the other scale. To scale the EPS file the same in both directions, you may use the shortcut of just specifying the one value. Example: $eps->scale(1.2, 0.8); # make wider and shorter $eps->scale(0.5); # shrink to half size =cut sub scale# {{{ { my $self = shift; my ($x, $y) = @_; $y = $x if (!defined $y); croak "bad arguments to scale" if (!defined $x); push @{$$self{epsprefix}}, "$x $y scale"; return 1; }# }}} =item C Rotates the EPS file by C degrees anti-clockwise. The EPS file is rotated about it's own origin (as defined by it's bounding box). To rotate by a particular co-ordinate (again, relative to the EPS file, not the main PostScript document), use translate, too. Example: $eps->rotate(180); # turn upside-down To rotate 30 degrees about point (50,50): $eps->translate(50, 50); $eps->rotate(30); $eps->translate(-50, -50); =cut sub rotate# {{{ { my $self = shift; my ($d) = @_; croak "bad arguments to rotate" if (!defined $d); push @{$$self{epsprefix}}, "$d rotate"; return 1; }# }}} =item C Move the EPS file by C,C PostScript points. Example: $eps->translate(10, 10); # move 10 points in both directions =cut sub translate# {{{ { my $self = shift; my ($x, $y) = @_; croak "bad arguments to translate" if (!defined $y); push @{$$self{epsprefix}}, "$x $y translate"; return 1; }# }}} =item C Clear all translate, rotate and scale operations. Example: $eps->reset(); =cut sub reset# {{{ { my $self = shift; @{$$self{"epsprefix"}} = (); return 1; }# }}} =item C Reads the EPS file into memory, to save reading it from file each time if inserted many times into a document. Can not be used with C. =cut sub load# {{{ { my $self = shift; local *EPS; return 1 if (defined $$self{"epsfile"}); $$self{"epsfile"} = "\%\%BeginDocument: $$self{file}\n"; open EPS, "< $$self{file}" || croak "can't open eps file $$self{file}"; while () { $$self{"epsfile"} .= $_; } close EPS; $$self{"epsfile"} .= "\%\%EndDocument\n"; return 1; }# }}} =item C Experimental: defines the EPS at in the document prolog, and just runs a command to insert it each time it is used. C