#! /bin/awk -f ###################################################################### # # pdb2cif # # Simple awk script to filter a PDB file and produce a CIF file. # # Phil Bourne (bourne@sdsc.edu) # # adapted to 6 Oct 95 cifdic.m95 0.7.28 # by # Herbert J. Bernstein # Bernstein+Sons, P.O. Box 177, Bellport, NY 11713 # phone/fax: 1-516-286-1999, email: yaya@aip.org # # Portions of the adaptation work were done with the cooperation of # the Chemistry Department, Brookhaven National Laboratory # #*************** WARNING ****************** WARNING ********************* # THIS IS AN ALPHA TEST VERSION OF CODE UNDER DEVELOPMENT. # ******* USE WITH GREAT CAUTION ******* # COMMENTS AND SUGGESTIONS APPRECIATED # If you like the basic approach, thank Phil Bourne. He did # the real work of creating pdb2cif. If you have problems with # the adaptation to cifdic.m95, tell yaya@aip.org # # Current major limitations: # # When PDB entries have a blank chain identifier, the current version # of the code assigns "." for the corresponding _struct_asym.id. This # is not a desirable choice. Better approaches are under discussion. # Comments appreciated. # # This version of pdb2cif will not run with all versions of awk, because # of the use of functions and a call to system. When the code stabilizes, # versions for older awks will be made from this one. Until then, if # you have trouble with /bin/awk, we would suggest using the gnu version # of awk, gawk. # ########################################################################### #************************************************************************ # # # This version available via http from: # http://www.chemistry.bnl.gov/~yaya/software/pdb2cif # # On most unix systems, you can make this into an executable program # by executing the command # # chmod 755 pdb2cif # # in which case the "awk -f pdb2cif" part of the following instructions # can be changed to "pdb2cif" # # # NOTE: On some systems, you may need to use "gawk" instead of "awk". This # program uses features which are _not_ found in the original Aho, Kernighan, # Weinberger, "Awk - a pattern scanning and processing language," but which # have since been added on most systems: functions and the call to "system" # if the use of function or system generates a syntax error, you may wish to # obtain the gnu version of awk, "gawk", to be able to run pdb2cif. The other # system dependency you may have is in the use of a system call to "date". Some # systems do not support the 4-digit year format code %Y, and others do not # support format codes at all. In the first case, you can change the %Y to # 19%y (just remember to fix this in the year 2000), but in the second case, # you should just comment out the offending call. The call is marked with # a WARNING comment later in this script. # # To run with a single pdb file: # awk -f this_file pdb_file > cif_file # eg awk -f pdb2cif 1nxb.pdb > 1nxb.cif # # To run with a single pdb file and get verbose output: # awk -f this_file verbose=yes pdb_file > cif_file # eg awk -f pdb2cif verbose=yes 1nxb.pdb > 1nxb.cif # # To run with a directory of pdb files such that *.pdb -> *.cif: # foreach i (*.pdb) # set head = ($i:r) # awk -f this_file [verbose=yes] $i > $head.cif # end # # Version History: # # 1.0.9-alpha 2 Nov 95 -- H. J. Bernstein # # Adopt cell.Z_PDB instead of cell.formula_units_Z # # 1.0.8-alpha 30 Oct 95 -- H. J. Bernstein # # Fix volume calculation # # 1.0.7-alpha 29 Oct 95 -- H. J. Bernstein # # 1. Add clear warnings about limitations # # 2. Fix _atom_site.entity_seq_num information for the # case of disordered residues presented non-interleaved, # as in 5hvp # # 3. Start conversion to m4 for old awk versions # # 4. Add conditional code to put entity_seq_num info # into all atom sites where known, provided dense_list # is not "yes" # # 1.0.6-alpha 26 Oct 95 -- H. J. Bernstein # # Sequence matching # # 1.0.5-alpha 10 Oct 95 -- H. J. Bernstein # # 1. Rework of formul and SEQRES # # 2. Make "covale" " . " for bonds, since # PDB does not distinguish bond orders any more # # 3. Fix gawk'ism in SHEET # # 1.0.4-alpha 19 Oct 95 -- H. J. Bernstein # # 1. Fix extraneous output from REVDAT # # 2. Add warning comments about gawk and date # # 3. Fix handling of saltbr and exptl.method # # 1.0.3-alpha 13 Oct 95 -- H. J. Bernstein # # 1. Revise sheet handling to match current dictionary # # 2. Convert ORIGX to cifdic.m95 0.7.28 (6 Oct 95) definition # # 1.0.2-alpha 6 Oct 95 -- H. J. Bernstein # # Many more changes of item.id's and added first pass # at typesetting codes, controlled by convtext="yes" # # 1.0.1-alpha 29 Sep 95, fix Phil's email address. -- HJB # # 1.0-alpha 28 Sep 95, base version of mmCIF cifdic.m95 version # in a state ready for testing. Makes some assumptions about # handling of ORIGX, MTRIX and TER which are not settled in # mmCIF, and may need to be changed. There are still several # PDB record types to be translated, and the ordering of the # output needs to be improved. # Herbert J. Bernstein, Bernstein+Sons # yaya@aip.org # # 1. Reorganized SCALE, MTRIX, ORIGX to common # format. # # 2. Added handling of KEYWRD, OBSLTE, SPRSDE # # 3. Reorganized atom site list and included TER # # 4. Reorganized FTNOTE output and moved down in output # # 0.9c-mm-alpha 23 Sep 92, corrected handling of formulas and # id's of HETs # # 0.9c-mm-pre-alpha 22 Sep 1995 Based on mmCIF dictionary v0.7.24 # 31 Aug 95, cifdic.m95, 22 September 1995, HJB, B+S # # 1. Automatically converted all official aliases in # version 0.9c, including most in comments # to standard mmCIF names # # 2. Manually converted most of the rest # # 3. Revised handling of ANISOU to use the # necessary atom_site category items with # lines embedded in the loops if ANISOU cards # are present. # # 4. Revised REVDAT handling to print at the # end of processing, using _database_pdb_rev # and _database_pdb_rev_record loops # # 5. Changed the atom types to the first 2 characters # of the PDB atom name # # 6. Squeezed out the blanks from the middle of PDB # atom names to remove the need for quote marks # # 7. Changed " " default chain indicators to "." instead # of "A", since "A" could cause mis-assignments # # Did a lot more and a lot remains to be done, but this # should be enough to allow for meaningful testing. The # major planned reorganizations are to add the remaining # PDB card types and to present the output in alphabetized # sections. # # Prior Version History: # # 0.9c Nov 1993 Based on mmCIF dictionary v0.5, 01-sep-1993 # # 1. Added _data block designator based on filename # # 2. Quoted space group ie 'P21 21 21' # # 3. All references to _atom_id are now in quotes to avoid # N A ie atom names in HETS containing white space. # # 4. MTRIX fixed - field spec. wrong in Guide to Author # wrong number of items - fixed # # 5. H-M should be H_M in space group name - fixed. # # 6. Removed some of the keyword dependencies e.g. # will now process list of ATOM only records # # 7. Handling of book citations in JRNL and REMARK 1 fixed. # # Known problems not fixed: # Determining that two strands or chains are chemically identical and # therefore the same entity even though they have different _asymm_id's # # # # # 0.9b Sept 1993 Based on mmCIF dictionary v0.5, 01-sep-1993 # # 1. Reworked the handling of _entities # # 2. Added the verbose option.. this is useful if you intend # using the converted file as a template for bigger and # better things # # 3. Corrected a number of loop_ problems # # 4. Added new data items to the following: # _diffrn_* # _struct_asym_* # _entity_reference_* # _entity_nonp_* # _entity_mon_* # 5. Corrected _struct_conf_id # # 0.9a Sept 1993 Based on mmCIF dictionary v0.5, 01-sep-1993 # 1. Much faster by storing not only incremental atom info. # but also PDB atom numbering info for use by CONECT records # # 2. Added _atom_sites_alt_id # _atom_sites_alt_details # # 3. Added _struct_conn_par1_label_alt_id and # _struct_conn_par2_label_alt_id # # 4. Added _struct_site_gen_label_alt_id # # 5. Added _database_PDB_rev_replaces # # # 0.9 August 1993 Based on mmCIF dictionary v0.5, 11-aug-1993 # # Comments: # # _entity_nonp_* is derived from the FORMUL records.. this is not # adequate since entries with non-polymer entities dont always # have these records. # # END records are ignored. # # ENDMDL records are ignored. # # EXPDTA added a data item _exptl_method not in mmCIF v0.5 # # JRNL/REMARK1 added _citation_journal_codon_pdb # # MTRIX Currently using the non-mmCIF definitions.. # _struct_asym_matrix_id # _struct_asym_matrix_11 .. _33 # _struct_asym_vector_1 .. _3 # These will need to be revisited. # # MODEL not supported. The suggested approach has been to use a # separate data block fo reach structure with a shared global header. # This is not currently implemented. # # REMARK records currently do not have a data item # # OBSLTE records are ignored -- the full release of the July 1993 PDB # did not contain any. # # ORIGX records ignored. # # SCALE records add 3 vectors to the orthogonal --> fractional # conversion not found in mmCIF. They are added here: # atom_sites_fract_tran_vector_11 # atom_sites_fract_tran_vector_22 # atom_sites_fract_tran_vector_33 # # # SPRCDE records ignored at this time while awaiting suitable data items. # # SIGATM and SIGUIJ records are ignored at this time. This will need to be # addressed. Right now only 1 enrty was found containing SIGATM entries # (4rxn) and none with SIGUIJ. # # TVECT records ignored. # # Additional data items for categories like _struct_topol will need # to be added as they evolve. # ####################################################################### BEGIN { version = "1.0.9-alpha" version_date = " 2 Nov 95" printf ("\n") printf ("###############################################\n") printf ("# #\n") printf ("# Converted from PDB format to CIF format by #\n") printf ("# pdb2cif version %-15s %9s #\n",version,version_date) printf ("# by Phil Bourne and Herbert J. Bernstein #\n") if (tolower(version) ~ /alpha/ || \ version ~ /beta/ ) { printf ("# ************* WARNING **************** #\n") printf ("# * * #\n") printf ("# * THIS IS A TEST VERSION USE WITH CAUTION * #\n") printf ("# * * #\n") printf ("# ******************************************* #\n") } printf ("# Please report problems to yaya@aip.org #\n") printf ("# #\n") printf ("###############################################\n") printf ((\ "#\n" \ "# Current major limitations:\n" \ "#\n" \ "# When PDB entries have a blank chain identifier, the current version\n" \ "# of the code assigns \".\" for the corresponding _struct_asym.id. This\n" \ "# is not a desirable choice. Better approaches are under discussion.\n" \ "# Comments appreciated.\n" \ "#\n" \ "# This version of pdb2cif will not run with all versions of awk, because\n" \ "# of the use of functions and a call to system. When the code stabilizes,\n" \ "# versions for older awks will be made from this one. Until then, if\n" \ "# you have trouble with /bin/awk, we would suggest using the gnu version\n" \ "# of awk, gawk.\n" \ "#\n" \ "###########################################################################\n" \ )) # # Set starting variables # # The following flag is used to produce a more complete CIF entry, # i.e. data items are given, but with the value "?". # If you desire only the minimum set of data items comment out the # following one line: # verbose = "yes" # # The following flag controls conversion of text fields using # the type-setting codes used in some PDB entries # convtext = "yes" # # The following flag control the distribution of entity_seq_num # to all atom site lines, uncomment if you do _not_ want # this distribution done, but want denser atom lists # dense_list = "yes" aniso_flag = 0 atom_alt_flag = 1 atom_flag = 1 atom_flag_1 = 1 atom_flag_2 = 1 audit_flag = 0 author_flag = 1 cit_flag = 0 # 1 is the primary citation compnd_flag = 1 conect_flag = 1 conect_flag_2 = 0 conect_id = 1 entity_flag = 0 entity_mon_flag = 1 ent_non_poly_point[" "] = "" ent_non_poly_num[" "] = 0 ent_poly_point[" "] = "" ent_poly_num[" "] = 0 entity_seq_num_flag = 0 for (x = 1; x <= 999; ++x) { ftnote_flag[x] = 0 } ftnote_flag_old = 1 foot_flag = 0 formul_flag = 1 helix_flag = 1 het_flag = 1 jrnl_flag = 1 keywrd_flag = 0 mon_flag = 1 mtrix_flag = 0 nonp_flag = 1 num_non_poly_ents = 0 num_poly_ents = 0 num_res_name = 0 num_res_pair = 0 origx_flag = 0 record_number = " " remark_flag = 0 remark_header_flag = 0 revdat_flag = 1 scale_flag = 0 seqres_flag = 1 sheet_flag = 0 sheet_flag_2 = 1 site_flag = 1 s_o_flag = 0 ss_flag = 1 # tracks HELIX, TURN and SHEET ss_flag_2 = 1 ssbond_flag = 1 ter_flag = 0 turn_flag = 1 turn_flag_2 = 1 flag = 0 # flags to correctly set ; previous_keyword = " " # prior to new keyword remark_number = 0 remark_number_old = 0 all_remarks = 0 # # set up conversion strings for residues # numl="0123456789" charl="abcdefghijklmnopqrstuvwxyz" charu="ABCDEFGHIJKLMNOPQRSTUVWXYZ" chars="+_*/!#$,.;:?|{}()" charx=(charl charu numl chars) # # Define date format conversion arrays # # mmm2mm[month_name] = month_ordinal # yyyy[2_digit_year] = 4_digit_year # mmm2mm["JAN"] = "01" mmm2mm["FEB"] = "02" mmm2mm["MAR"] = "03" mmm2mm["APR"] = "04" mmm2mm["MAY"] = "05" mmm2mm["JUN"] = "06" mmm2mm["JUL"] = "07" mmm2mm["AUG"] = "08" mmm2mm["SEP"] = "09" mmm2mm["OCT"] = "10" mmm2mm["NOV"] = "11" mmm2mm["DEC"] = "12" # for (yy=0; yy < 100; ++yy) { yyyy[yy+0] = yy+1900 if ( yy < 70 ) yyyy[yy+0] += 1000 } # Define lists of amino acids and nucleic acids num_aa = split(("ABU ACD ALA ALB ALI ARG ARO ASN ASP ASX" \ " BAS CYS GLN GLU GLX GLY HIS HYP ILE LEU" \ " LYS MET PCA PHE PRO SER THR TRP TYR VAL"),\ aa_list, " ") num_na = split(("A +A C +C G +G I +I T +T U +U"),na_list, " ") } # End of BEGIN statement # # Determine whether this is a new keyword, if so and flag is set # terminate free text with a ; { if (length($0) > 5) { if ($1 != previous_keyword && flag != "0") { printf ("; \n\n") flag = 0 previous_keyword = $1 } else { previous_keyword = $1 } } } #========================================================================= # Keyword ATOM or HETATM or TER # # atom pdb type [ 1- 6] = _atom_site.group_PDB # atom serial number # [ 7-11] = _atom_site.id # atom type [13-14] # (first 2 characters of atom name) # = _atom_site.type_symbol # atom name [13-16] = _atom_site.label_atom_id # alternate location [17] = _atom_site.label_alt_id # residue name [18-20] = _atom_site.label_comp_id # chain identifier [22] = _atom_site.label_asym_id # residue sequence no. # [23-26] = _atom_site.label_seq_id # insertion code [27] = appended to residue sequence no. # # Information on non_standard monomers and non-polymers derived from # HET and FORMUL records is presented here using additional information # derived from ATOM and HETATM records. # The assignment of non-standard monomers versus non-polymers # is tricky and unlikely to be correct for all entries. Assignment is # based on the following rules: # i) If the HET has a chain id then it must be non-standard (this # is not complete since single chains do not have an chain id # assigned. # ii) Any HETATM following a TER and with no subsequent ATOM records # is consider a non-polymer. # # Shortcomings: # 1. special cases of atom type (e.g. OX, 1H) need to be handled # { if ($1 == "ATOM" || $1 == "HETATM" || $1 == "TER") { # parse field and save ATOM/HETATM/TER info # Since atoms are not necessarily numbered consecutively maintain # a complete conesecutive list 1 -> atom_flag and a partial # list for use by CONECT which references the atom_number # atom_pdb = substr( ($0), 7, 5) atom_number[atom_flag] = substr( ($0), 7, 5) atom_point[atom_number[atom_flag]] = atom_flag atom_name[atom_flag] = substr( ($0), 13, 4) temp_name=\ squeezname(atom_name[atom_flag]) atom_name[atom_flag] = temp_name atom_type[atom_flag] = substr( temp_name, 1, 2) atom_con[atom_pdb] = temp_name atom_alt_location[atom_flag] = substr( ($0), 17, 1) atom_alt_con[atom_pdb] = substr( ($0), 17, 1) residue_name[atom_flag] = substr( ($0), 18, 3) residue_con[atom_pdb] = substr( ($0), 18, 3) chain_id[atom_flag] = substr( ($0), 22, 1) chain_con[atom_pdb] = substr( ($0), 22, 1) residue_seq_number[atom_flag] = substr( ($0), 23, 4) residue_insert_ind[atom_flag] = substr( ($0), 27, 1) residue_seq_con[atom_pdb] = substr( ($0), 23, 4) atom_x[atom_flag] = substr( ($0), 31, 8) atom_y[atom_flag] = substr( ($0), 39, 8) atom_z[atom_flag] = substr( ($0), 47, 8) atom_occ[atom_flag] = substr( ($0), 55, 6) B_or_U[atom_flag] = substr( ($0), 61, 6) footnote_number[atom_flag] = substr( ($0), 68, 3) if (atom_x[atom_flag] == " " ) { atom_x[atom_flag] = " . " } if (atom_y[atom_flag] == " " ) { atom_y[atom_flag] = " . " } if (atom_z[atom_flag] == " " ) { atom_z[atom_flag] = " . " } if (atom_occ[atom_flag] == " " ) { atom_occ[atom_flag] = " . " } if (B_or_U[atom_flag] == " " ) { B_or_U[atom_flag] = " . " } # # # flag atom as ATOM or HETATM or TER # if ($1 == "ATOM") atom_or_het[atom_flag] = "ATOM" if ($1 == "HETATM") atom_or_het[atom_flag] = "HETATM" if ($1 == "TER") atom_or_het[atom_flag] = "TER" # # set alternate location value if blank # if (atom_alt_location[atom_flag] == " ") \ atom_alt_location[atom_flag] = "." if (atom_alt_con[atom_pdb] == " ") \ atom_alt_con[atom_pdb] = "." # # make a list of alternative atoms # if (atom_alt_location[atom_flag] != ".") { at_alt = atom_alt_location[atom_flag] atom_alt_list[at_alt]++ } # # set footnote value if blank # if (footnote_number[atom_flag] == " ") \ footnote_number[atom_flag] = " . " # # set chain_id and entity_id to A for ATOM records if blank # if ( ($1 == "ATOM" || $1 == "TER") \ && chain_id[atom_flag] == " ") { chain_id[atom_flag] = "." entity_id[atom_flag] = "." } if (chain_con[atom_pdb] == " ") chain_con[atom_pdb] = "." # # set chain_id to . for HETATM records if blank # if ($1 == "HETATM" && chain_id[atom_flag] == " ") { chain_id[atom_flag] = "." } # # set entity_id to chain_id for ATOM and TER records # if ($1 == "ATOM" && chain_id[atom_flag] != " ") { entity_id[atom_flag] = chain_id[atom_flag] } if ($1 == "TER" && chain_id[atom_flag] != " ") { entity_id[atom_flag] = chain_id[atom_flag] } # # set _entity.id to residue_name for HETATM records # if ($1 == "HETATM") { entity_id[atom_flag] = residue_name[atom_flag] hetatm_entity = residue_name[atom_flag] ent_non_poly_id[hetatm_entity]++ if (ent_non_poly_id[hetatm_entity] == 1) { next_non_poly_id=ent_non_poly_point[" "] prev_non_poly_id=" " while(next_non_poly_id != "") { prev_non_poly_id = next_non_poly_id next_non_poly_id = ent_non_poly_point[prev_non_poly_id] } ent_non_poly_point[prev_non_poly_id] = hetatm_entity ent_non_poly_point[hetatm_entity] = "" ++num_non_poly_ents ent_non_poly_num[hetatm_entity] = num_non_poly_ents } if (entity_seq_num[residue_name[atom_flag]] != "" &&\ entity_seq_num[residue_name[atom_flag]]+0 <= \ num_poly_ents ) \ entity_id[atom_flag] = chain_id[atom_flag] } # # define _entities for polypeptide chains or DNA strands # ie these are _entity_poly. Done by checking for chain in chain_id # in ATOM records if ($1 == "ATOM") { atom_entity = chain_id[atom_flag] ent_poly_id[atom_entity]++ if (ent_poly_id[atom_entity] == 1) { next_poly_id=ent_poly_point[" "] prev_poly_id=" " while(next_poly_id != "") { prev_poly_id = next_poly_id next_poly_id = ent_poly_point[prev_poly_id] } ent_poly_point[prev_poly_id] = atom_entity ent_poly_point[atom_entity] = "" ++num_poly_ents ent_poly_num[atom_entity] = num_poly_ents entity_seq_num[atom_entity] = num_poly_ents entities[num_poly_ents] = atom_entity } } ++atom_flag } } #===================================================================== # Keyword ANISOU # # Values are stored and presented as a separate looped list, # unlike the PDB entry that has each ANISOU record following # the corresponding atom. # # atom serial number = matched via pointers to ATOM/HETATM # atom type = dropped, taken from ATOM/HETATM # atom name = dropped, taken from ATOM/HETATM # alternate location = dropped, taken from ATOM/HETATM # residue name = dropped, taken from ATOM/HETATM # chain identifier = dropped, taken from ATOM/HETATM # residue sequence no. = dropped, taken from ATOM/HETATM # insertion code = dropped, taken from ATOM/HETATM # # # # Note the different order # PDB CIF # 1. U[1][1] U[1][1] # 2. U[2][2] U[1][2] # 3. U[3][3] U[1][3] # 4. U[1][2] U[2][2] # 5. U[1][3] U[2][3] # 6. U[2][3] U[3][3] # { if ($1 == "ANISOU") { # parse field ++aniso_flag a_atom_serial_number[aniso_flag] = substr( ($0), 7, 5) aniso_point[a_atom_serial_number[aniso_flag]] = aniso_flag a_atom_name[aniso_flag] = substr( ($0), 13, 4) a_atom_type[aniso_flag] = substr( ($0), 13, 2) a_atom_alt_location[aniso_flag] = substr( ($0), 17, 1) a_residue_name[aniso_flag] = substr( ($0), 18, 3) a_chain_id[aniso_flag] = substr( ($0), 22, 1) a_residue_seq_number[aniso_flag] = substr( ($0), 23, 4) atom_U11[aniso_flag] = substr( ($0), 29, 7) atom_U22[aniso_flag] = substr( ($0), 36, 7) atom_U33[aniso_flag] = substr( ($0), 43, 7) atom_U12[aniso_flag] = substr( ($0), 50, 7) atom_U13[aniso_flag] = substr( ($0), 57, 7) atom_U23[aniso_flag] = substr( ($0), 64, 7) if (a_atom_alt_location[aniso_flag] == " ") \ a_atom_alt_location[aniso_flag] = "?" if (a_chain_id[aniso_flag] == " ") a_chain_id[aniso_flag] = "." } } #==================================================================== # Keyword AUTHOR # # Designate first author as _audit_contact_author # Loop over remaining authors as _audit_author { if ($1 == "AUTHOR") { # parse record creating an array of authors text = substr( ($0), 11, 60) auth_convtext = "yes" # uncomment the next line if convtext control of typestetting desired # auth_convtext = "conditional" if (convtext == "yes") auth_convtext = "yes" if (auth_convtext == "yes") text=\ typeset(text) cont = substr( ($0), 9, 2) num_auth = split( text, authors, "," ) for (i=1; i <= num_auth; ++i){ num_a_split = split( authors[i], a_split, " ") authors[i] = "" if (num_a_split > 0) { authors[i] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[i] = (authors[i] " " a_split[j]) } } } if (author_flag == "1") { printf ("\n\n\n") printf ("########################\n") printf ("# #\n") printf ("# AUDIT_CONTACT_AUTHOR #\n") printf ("# #\n") printf ("########################\n\n\n") printf ("_audit_contact_author.name '%s'\n", authors[1]) if (verbose) { printf ("_audit_contact_author.address ? \n") printf ("_audit_contact_author.phone ? \n") printf ("_audit_contact_author.fax ? \n") printf ("_audit_contact_author.email ? \n\n") } } if (num_auth >= 2 && author_flag == "1") { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# AUDIT_AUTHOR #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("loop_ \n") printf ("_audit_author.name \n" ) is_blank = authors[num_auth] if (is_blank == "") { for (i=2; i < num_auth; ++i) { printf ("'%s' \n", authors[i]) } } if (is_blank != "") { for (i=2; i <= num_auth; ++i) { printf ("'%s' \n", authors[i]) } } } if (author_flag != "1") { is_blank = authors[num_auth] if (is_blank == "") { for (i=1; i < num_auth; ++i) { printf ("'%s' \n", authors[i]) } } else for (i=1; i <= num_auth; ++i) { printf ("'%s' \n", authors[i]) } } ++author_flag } } #====================================================================== # Keyword COMPND # # This is considered a common name for the macromolecule # and = _chemical.name_common # # An effort is made to find the enzyme classification if it exists # however it is not used at present since if it were a complex it # would be an entity and represented as: # ent_name == _entity_name_systematic # ? == _entity_name_systematic_system # But what if it is the whole structure? { if ($1 == "COMPND") { # localize compound name compnd[compnd_flag] = substr( ($0), 11, 60) if (convtext == "yes") \ compnd[compnd_flag]=\ typeset(compnd[compnd_flag] ) if (compnd_flag == "1") { printf ("\n_chemical.name_common \n") } ++compnd_flag } } { if (compnd_flag != "1" && $1 != "COMPND") { printf ("; ") for (i=1; i < compnd_flag; ++i) { printf (" %s\n", compnd[i]) } printf ("; \n") compnd_flag = 1 } } # ec_start = index( ($0), "(E.C." ) # if (ec_start) { # ent_name = substr( ($0), ec_start, 14) # } #====================================================================== # Keyword CONECT # # Origin serial number = _struct_conn.ptnr1_label_res_id # = _struct_conn.ptnr1_label_asym_id # = _struct_conn.ptnr1_label_seq_id # = _struct_conn.ptnr1_label_atom_id # = _struct_conn.ptnr1_label_alt_id # Target serial numbers = _struct_conn.ptnr2_label_res_id # = _struct_conn.ptnr2_label_asym_id # = _struct_conn.ptnr2_label_seq_id # = _struct_conn.ptnr2_label_atom_id # = _struct_conn.ptnr2_label_alt_id # Hydrogen bond donor = _struct_conn.conn_type_id # Hydrogen bond acceptor = _struct_conn.conn_type_id # Salt bridge excess -ve = _struct_conn.conn_type_id # Salt bridge excess +ve = _struct_conn.conn_type_id # # _struct_conn.id = incremental number assigned to each # CONECT record # _struct_conn.conn_type_id = matches generic _struct_conn_type.criteria # # all atoms at 1_555 ie no support for -ve targets # No special details included # { if ($1 == "CONECT") { # Display geometry data items here if (conect_flag == "1") { if (verbose) { printf ("\n_geom.details ? \n") printf ("\nloop_\n") printf ("_geom_angle.atom_site_id_1 \n") printf ("_geom_angle.atom_site_id_2 \n") printf ("_geom_angle.atom_site_id_3 \n") printf ("_geom_angle.value \n") printf ("_geom_angle.site_symmetry_1 \n") printf ("_geom_angle.site_symmetry_2 \n") printf ("_geom_angle.site_symmetry_3 \n") printf ("_geom_angle.publ_flag \n") printf (" ? ? ? ? ? ? ? ? \n") printf ("\nloop_\n") printf ("_geom_bond.atom_site_id_1 \n") printf ("_geom_bond.atom_site_id_2 \n") printf ("_geom_bond.dist \n") printf ("_geom_bond.site_symmetry_1 \n") printf ("_geom_bond.site_symmetry_2 \n") printf ("_geom_bond.publ_flag \n") printf (" ? ? ? ? ? ? \n") printf ("\nloop_\n") printf ("_geom_contact.atom_site_id_1 \n") printf ("_geom_contact.atom_site_id_2 \n") printf ("_geom_contact.dist \n") printf ("_geom_contact.publ_flag \n") printf ("_geom_contact.site_symmetry_1 \n") printf ("_geom_contact.site_symmetry_2 \n") printf (" ? ? ? ? ? ? \n") printf ("\nloop_\n") printf ("_geom_torsion.atom_site_id_1 \n") printf ("_geom_torsion.atom_site_id_2 \n") printf ("_geom_torsion.atom_site_id_3 \n") printf ("_geom_torsion.atom_site_id_4 \n") printf ("_geom_torsion.value \n") printf ("_geom_torsion.publ_flag \n") printf ("_geom_torsion.site_symmetry_1 \n") printf ("_geom_torsion.site_symmetry_2 \n") printf ("_geom_torsion.site_symmetry_3 \n") printf ("_geom_torsion.site_symmetry_4 \n") printf (" ? ? ? ? ? ? ? ? ? ? \n") } printf ("\n\n\n") printf ("##############################\n") printf ("# #\n") printf ("# STRUCT_CONN_TYPE #\n") printf ("# #\n") printf ("##############################\n\n\n") printf ("\nloop_\n") printf ("_struct_conn_type.id\n") printf ("_struct_conn_type.criteria\n") printf ("_struct_conn_type.reference\n") printf (" . 'unknown bond type from PDB file' ?\n") printf ("saltbr 'defined by user in PDB file' ?\n") printf ("hydrog 'defined by user in PDB file' ?\n") if (ssbond_flag > 1) { printf ("disulf 'defined by user in PDB file' ?\n") } printf ("\n\n\n") printf ("##############################\n") printf ("# #\n") printf ("# STRUCT_CONN #\n") printf ("# #\n") printf ("##############################\n\n\n") printf ("\nloop_\n") printf ("_struct_conn.id\n") printf ("_struct_conn.conn_type_id\n") printf ("_struct_conn.ptnr1_label_res_id\n") printf ("_struct_conn.ptnr1_label_asym_id\n") printf ("_struct_conn.ptnr1_label_seq_id\n") printf ("_struct_conn.ptnr1_label_atom_id\n") printf ("_struct_conn.ptnr1_label_alt_id\n") printf ("_struct_conn.ptnr1_role\n") printf ("_struct_conn.ptnr1_symmetry\n") printf ("_struct_conn.ptnr2_label_res_id\n") printf ("_struct_conn.ptnr2_label_asym_id\n") printf ("_struct_conn.ptnr2_label_seq_id\n") printf ("_struct_conn.ptnr2_label_atom_id\n") printf ("_struct_conn.ptnr2_label_alt_id\n") printf ("_struct_conn.ptnr2_role\n") printf ("_struct_conn.ptnr2_symmetry\n") printf ("_struct_conn.details\n") } # parse field conect_source = substr( ($0), 7, 5) conect_target_1 = substr( ($0), 12, 5) conect_target_2 = substr( ($0), 17, 5) conect_target_3 = substr( ($0), 22, 5) conect_target_4 = substr( ($0), 27, 5) conect_h_accept_1 = substr( ($0), 32, 5) conect_h_accept_2 = substr( ($0), 37, 5) conect_salt_positive = substr( ($0), 42, 5) conect_h_donor_1 = substr( ($0), 47, 5) conect_h_donor_2 = substr( ($0), 52, 5) conect_salt_negative = substr( ($0), 57, 5) conect_spec_detail = "." # First unknown bond if (conect_target_1 != " " && \ conect_target_1 != "") { # conect_type = " . " conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "?" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_target_1] conect_ptnr2_label_asym_id = chain_con[conect_target_1] conect_ptnr2_label_seq_id = residue_seq_con[conect_target_1] conect_ptnr2_label_atom_id = atom_con[conect_target_1] conect_ptnr2_label_alt_id = atom_alt_con[conect_target_1] conect_ptnr2_role = "?" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n",\ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Second unknown bond if (conect_target_2 != " " && \ conect_target_2 != "") { conect_type = " . " conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "?" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_target_2] conect_ptnr2_label_asym_id = chain_con[conect_target_2] conect_ptnr2_label_seq_id = residue_seq_con[conect_target_2] conect_ptnr2_label_atom_id = atom_con[conect_target_2] conect_ptnr2_label_alt_id = atom_alt_con[conect_target_2] conect_ptnr2_role = "?" conect_ptnr2_symmetry = "1_555" printf\ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n",\ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Third unknown bond if (conect_target_3 != " " && \ conect_target_3 != "") { conect_type = " . " conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "?" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_target_3] conect_ptnr2_label_asym_id = chain_con[conect_target_3] conect_ptnr2_label_seq_id = residue_seq_con[conect_target_3] conect_ptnr2_label_atom_id = atom_con[conect_target_3] conect_ptnr2_label_alt_id = atom_alt_con[conect_target_3] conect_ptnr2_role = "?" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Fourth unknown bond if (conect_target_4 != " " && \ conect_target_4 != "") { conect_type = " . " conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "?" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_target_4] conect_ptnr2_label_asym_id = chain_con[conect_target_4] conect_ptnr2_label_seq_id = residue_seq_con[conect_target_4] conect_ptnr2_label_atom_id = atom_con[conect_target_4] conect_ptnr2_label_alt_id = atom_alt_con[conect_target_4] conect_ptnr2_role = "?" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # First H bond acceptor if (conect_h_accept_1 != " " && \ conect_h_accept_1 != "") { conect_type = "hydrog" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "don" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_h_accept_1] conect_ptnr2_label_asym_id = chain_con[conect_h_accept_1] conect_ptnr2_label_seq_id = residue_seq_con[conect_h_accept_1] conect_ptnr2_label_atom_id = atom_con[conect_h_accept_1] conect_ptnr2_label_alt_id = atom_alt_con[conect_h_accept_1] conect_ptnr2_role = "acc" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Second H bond acceptor if (conect_h_accept_2 != " " && \ conect_h_accept_2 != "") { conect_type = "hydrog" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "don" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_h_accept_2] conect_ptnr2_label_asym_id = chain_con[conect_h_accept_2] conect_ptnr2_label_seq_id = residue_seq_con[conect_h_accept_2] conect_ptnr2_label_atom_id = atom_con[conect_h_accept_2] conect_ptnr2_label_alt_id = atom_alt_con[conect_h_accept_2] conect_ptnr2_role = "acc" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # First H bond donor if (conect_h_donor_1 != " " && \ conect_h_donor_1 != "") { conect_type = "hydrog" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "acc" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_h_donor_1] conect_ptnr2_label_asym_id = chain_con[conect_h_donor_1] conect_ptnr2_label_seq_id = residue_seq_con[conect_h_donor_1] conect_ptnr2_label_atom_id = atom_con[conect_h_donor_1] conect_ptnr2_label_alt_id = atom_alt_con[conect_h_donor_1] conect_ptnr2_role = "don" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Second H bond donor if (conect_h_donor_2 != " " && \ conect_h_donor_2 != "") { conect_type = "hydrog" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_con[conect_source] conect_ptnr1_label_atom_id = atom_seq_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "acc" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_h_donor_2] conect_ptnr2_label_asym_id = chain_con[conect_h_donor_2] conect_ptnr2_label_seq_id = residue_seq_con[conect_h_donor_2] conect_ptnr2_label_atom_id = atom_con[conect_h_donor_2] conect_ptnr2_label_alt_id = atom_alt_con[conect_h_donor_2] conect_ptnr2_role = "don" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Salt Bridge - access of +ve charge if (conect_salt_positive != " " && \ conect_salt_positive != "") { conect_type = "saltbr" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "-ve" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_salt_positive] conect_ptnr2_label_asym_id = chain_con[conect_salt_positive] conect_ptnr2_label_seq_id = residue_seq_con[conect_salt_positive] conect_ptnr2_label_atom_id = atom_con[conect_salt_positive] conect_ptnr2_label_alt_id = atom_alt_con[conect_salt_positive] conect_ptnr2_role = "+ve" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } # Salt Bridge - access of -ve charge if (conect_salt_negative != " " && \ conect_salt_negative != "") { conect_type = "saltbr" conect_ptnr1_label_res_id = residue_con[conect_source] conect_ptnr1_label_asym_id = chain_con[conect_source] conect_ptnr1_label_seq_id = residue_seq_con[conect_source] conect_ptnr1_label_atom_id = atom_con[conect_source] conect_ptnr1_label_alt_id = atom_alt_con[conect_source] conect_ptnr1_role = "+ve" conect_ptnr1_symmetry = "1_555" conect_ptnr2_label_res_id = residue_con[conect_salt_negative] conect_ptnr2_label_asym_id = chain_con[conect_salt_negative] conect_ptnr2_label_seq_id = residue_seq_con[conect_salt_negative] conect_ptnr2_label_atom_id = atom_con[conect_salt_negative] conect_ptnr2_label_alt_id = atom_alt_con[conect_salt_negative] conect_ptnr2_role = "-ve" conect_ptnr2_symmetry = "1_555" printf \ ("%3s %6s %3s %1s %4s %4s %2s %3s %5s %3s %1s %4s %4s %2s %3s %5s %1s\n", \ conect_flag, conect_type, conect_ptnr1_label_res_id, \ conect_ptnr1_label_asym_id, conect_ptnr1_label_seq_id, \ conect_ptnr1_label_atom_id, conect_ptnr1_label_alt_id, \ conect_ptnr1_role, conect_ptnr1_symmetry, \ conect_ptnr2_label_res_id, conect_ptnr2_label_asym_id, \ conect_ptnr2_label_seq_id, conect_ptnr2_label_atom_id, \ conect_ptnr2_label_alt_id, \ conect_ptnr2_role, conect_ptnr2_symmetry,conect_spec_detail ) ++conect_flag } ++conect_flag_2 } } #==================================================================== # Keyword EXPDTA # # expdta [11-70] = _exptl.method # { if ($1 == "EXPDTA") { # parse field expdta = substr( ($0), 11, 60) printf (" _exptl.method ' %-s60 '\n", expdta) } } #====================================================================== # Keyword FORMUL - chemical formula of non-standard groups # (No continuation records for FORMUL found in July 93 PDB, therefore # ignored) # # component number == _entity.id & _chem_comp.entity_id # het identifier == _entity_name_common & _chem_comp.id # het_formula_mw == ignored # het_formula_text == _chem_comp.formula # ?? == entity_special_details # # Information written in ATOM/HETATM keyword { if ($1 == "FORMUL") { # parse field formul_het_number[formul_flag] = substr( ($0), 9, 2)+0 formul_het_site_symbol[formul_flag] = substr( ($0), 13, 3) formul_het_cont_flag[formul_flag] = substr( ($0), 17, 2) hetatm_entity = substr( ($0), 13, 3) entity_seq_num[formul_het_site_symbol[formul_flag]] = \ formul_het_number[formul_flag]+0 if ( substr( ($0), 18, 1) == " " ) { formul_het_text[formul_flag] = substr( ($0), 20,51) } else { if ( substr( formul_het_text[formul_flag], 1, 1) != "\n" ) { formul_het_text[formul_flag] = ("\n; " \ formul_het_text[formul_flag] "\n " substr( ($0), 20,52) \ "\n") } else { formul_het_text[formul_flag] = ( \ formul_het_text[formul_flag] substr( ($0), 20,52) "\n") } } hetatm_entity = formul_het_site_symbol[formul_flag] ent_non_poly_id[hetatm_entity]++ if (ent_non_poly_id[hetatm_entity] == 1) { next_non_poly_id=ent_non_poly_point[" "] prev_non_poly_id=" " while(next_non_poly_id != "") { prev_non_poly_id = next_non_poly_id next_non_poly_id = ent_non_poly_point[prev_non_poly_id] } ent_non_poly_point[prev_non_poly_id] = hetatm_entity ent_non_poly_point[hetatm_entity] = "" ++num_non_poly_ents ent_non_poly_num[hetatm_entity] = \ formul_het_number[formul_flag] } ++formul_flag # Set up to read addiional entities from ATOM records (entity_flag = formul_flag - 1) } } #========================================================================= # keyword FTNOTE -- footnote to atoms or residues # # footnote number == _atom_sites_footnote.id # footnote text == _atom_sites_footnote.text { if ($1 == "FTNOTE" ) { x = substr ( ($0), 10, 1) if (ftnote_flag[1] == "0" ) { ft_save[++foot_flag]="\nloop_\n" ft_save[++foot_flag]="_atom_sites_footnote.id \n" ft_save[++foot_flag]="_atom_sites_footnote.text \n" } ftnote_num = substr ( ($0), 10, 1) ftnote_text = substr ( ($0), 12, 59) if (ftnote_num > ftnote_flag_old ) { ft_save[++foot_flag]="; \n" } if (ftnote_flag[ftnote_num] == "0") { ft_save[++foot_flag]=(" " ftnote_num "\n") } if (ftnote_flag[ftnote_num] == "0") { ft_save[++foot_flag]=("; " ftnote_text "\n") ++ftnote_flag[ftnote_num] } else { ft_save[++foot_flag]=(" " ftnote_text "\n") ++ftnote_flag[ftnote_num] } ftnote_flag_old = ftnote_num } } #==================================================================== # Keyword HEADER # # This is a good place to place the _struct_biol data items. Templates # are given but no information has been parsed excluding # _special_details. # # head_funct_class [11-50] == _struct_biol.details # head_dep_date [51-59] == _database_PDB_rev.date_original # _audit.creation_date # head_PDB_code [63-66] == _database_2.database_code # _struct_biol.id # _audit_revision_id { if ($1 == "HEADER") { head_funct_class = substr ( ($0), 11, 40) head_dep_date = substr ( ($0), 51, 9) head_PDB_code = substr ( ($0), 63, 4) # # Output the PDB code immediately as the data block name # printf("data_%4s\n",head_PDB_code) # # save the header id as a possible audit.revision_id # aud_rev_id = head_PDB_code } } #==================================================================== # Keyword KEYWRD # # keywrd_list [11-70] == _struct_keywords.text # from HEADER: # head_PDB_code == _struct_keywords.entry_id { if ($1 == "KEYWRD") { keywrd_list = substr ( ($0), 11, 60) # num_key = split( keywrd_list, key_list, "," ) if ( keywrd_flag == 0 ) { key_save[++keywrd_flag] = \ "\n\n\n" key_save[++keywrd_flag] = \ "#############################\n" key_save[++keywrd_flag] = \ "# #\n" key_save[++keywrd_flag] = \ "# STRUCT_KEYWORDS #\n" key_save[++keywrd_flag] = \ "# #\n" key_save[++keywrd_flag] = \ "#############################\n\n" key_save[++keywrd_flag] = \ "loop_\n" key_save[++keywrd_flag] = \ "_struct_keywords.entry_id\n" key_save[++keywrd_flag] = \ "_struct_keywords.text\n" } for (i = 1; i <= num_key; ++i) { num_el = split (key_list[i], key_el, " ") key_list[i] = key_el[1] for (j = 2; j <= num_el; ++j) { key_list[i] = (key_list[i] " " key_el[j]) } key_save[++keywrd_flag] = \ sprintf( "%4s '%s'\n", head_PDB_code, key_list[i] ) } } } #======================================================================= # Keyword HELIX # # 8 - 10 helix_no. == (not used) # 12 - 14 helix_id == _struct_conf.id # 16 - 18 helix_res_name_beg == _struct_conf.beg_label_comp_id # 20 helix_chain_id_beg == _struct_conf.beg_label_asym_id # 22 - 26 helix_res_seq_beg == _struct_conf.beg_label_seq_id # 28 - 30 helix_res_name_end == _struct_conf.end_label_comp_id # 32 helix_chain_id_end == _struct_conf.end_label_asym_id # 34 - 38 helix_res_seq_end == _struct_conf.end_label_seq_id # 39 - 40 helix_class == _struct_conf.conf_type_id # 41 - 70 helix_comment == _struct_conf.details # # note helix classes 9 and 10 as defined by the PDB do not have CIF # definitions # # { if ($1 == "HELIX") { helix_no[ss_flag] = substr( ($0), 8, 3) helix_id[ss_flag] = substr( ($0),12, 3) helix_res_name_beg[ss_flag] = substr( ($0),16, 3) helix_chain_id_beg[ss_flag] = substr( ($0),20, 1) helix_res_seq_beg[ss_flag] = substr( ($0),22, 5) helix_res_name_end[ss_flag] = substr( ($0),28, 3) helix_chain_id_end[ss_flag] = substr( ($0),32, 1) helix_res_seq_end[ss_flag] = substr( ($0),34, 5) helix_class[ss_flag] = substr( ($0),39, 2) helix_comment[ss_flag] = substr( ($0),41,30) if (helix_comment[ss_flag] == " " || \ helix_comment[ss_flag] == "") { helix_comment[ss_flag] = " . " if (helix_class[ss_flag]+0 == 1) \ helix_comment[ss_flag] = "RIGHT-HANDED ALPHA HELIX" if (helix_class[ss_flag]+0 == 2) \ helix_comment[ss_flag] = "RIGHT-HANDED OMEGA HELIX" if (helix_class[ss_flag]+0 == 3) \ helix_comment[ss_flag] = "RIGHT-HANDED PI HELIX" if (helix_class[ss_flag]+0 == 4) \ helix_comment[ss_flag] = "RIGHT-HANDED GAMMA HELIX" if (helix_class[ss_flag]+0 == 5) \ helix_comment[ss_flag] = "RIGHT-HANDED 3/10 HELIX" if (helix_class[ss_flag]+0 == 6) \ helix_comment[ss_flag] = "LEFT-HANDED ALPHA HELIX" if (helix_class[ss_flag]+0 == 7) \ helix_comment[ss_flag] = "LEFT-HANDED OMEGA HELIX" if (helix_class[ss_flag]+0 == 8) \ helix_comment[ss_flag] = "LEFT-HANDED GAMMA HELIX" if (helix_class[ss_flag]+0 == 9) \ helix_comment[ss_flag] = "2/7 RIBBON/HELIX" if (helix_class[ss_flag]+0 == 10) \ helix_comment[ss_flag] = "POLYPROLINE" } # strip blanks from id num_x = split(helix_id[ss_flag],xxx," ") helix_id[ss_flag] = "" if (num_x == 1) helix_id[ss_flag] = xxx[1] if (num_x == 2) helix_id[ss_flag] = (xxx[1] "_" xxx[2]) # provide default conditions if (helix_chain_id_beg[ss_flag] == " ") \ helix_chain_id_beg[ss_flag] = "." if (helix_chain_id_end[ss_flag] == " ") \ helix_chain_id_end[ss_flag] = "." # give real names to helix classes h_class_suffix = "_P" num_x = split( (helix_res_name_end[ss_flag] " " \ helix_res_name_beg[ss_flag]), xxx, " ") for(i in na_list) { if (na_list[i] == xxx[1] || \ na_list[i] == xxx[2] ) h_class_suffix = "_N" } if (helix_class[ss_flag] == " 1") \ helix_class[ss_flag] = ("HELX_RH_AL" h_class_suffix) if (helix_class[ss_flag] == " 2") \ helix_class[ss_flag] = ("HELX_RH_OM" h_class_suffix) if (helix_class[ss_flag] == " 3") \ helix_class[ss_flag] = ("HELX_RH_PI" h_class_suffix) if (helix_class[ss_flag] == " 4") \ helix_class[ss_flag] = ("HELX_RH_GA" h_class_suffix) if (helix_class[ss_flag] == " 5") \ helix_class[ss_flag] = ("HELX_RH_3T" h_class_suffix) if (helix_class[ss_flag] == " 6") \ helix_class[ss_flag] = ("HELX_LH_AL" h_class_suffix) if (helix_class[ss_flag] == " 7") \ helix_class[ss_flag] = ("HELX_LH_OM" h_class_suffix) if (helix_class[ss_flag] == " 8") \ helix_class[ss_flag] = ("HELX_LH_GA" h_class_suffix) if (helix_class[ss_flag] == " 9") \ helix_class[ss_flag] = ("HELX_2_7" h_class_suffix) if (helix_class[ss_flag] == "10") \ helix_class[ss_flag] = ("HELX_PP" h_class_suffix) ++ss_flag ++helix_flag } } #=================================================================== # Keyword HET # # het_site_symbol == to be checked against _entity.id from FORMUL # number of atoms == _entity_number_of_atoms # het_site_chain == ???? # het_atoms_text == _chem_comp.number_atoms_nh # heterogen identifier == _entity_formula # ? == _entity_source # # Nothing is written here..written in ATOM/HETATM keyword # { if ($1 == "HET") { # parse field het_site_symbol[het_flag] = substr( ($0), 8, 3) het_site_chain[het_flag] = substr( ($0), 13, 1) het_site_residue[het_flag] = substr( ($0), 14, 4) het_atoms_number[het_flag] = substr( ($0), 21, 5) het_atoms_text[het_flag] = substr( ($0), 31,40) if ( het_site_chain[het_flag] == " ") { het_site_chain[het_flag] = "." } ++het_flag } } #================================================================== # Keyword JRNL # # As defined by the PDB, this is the primary citation that matches the # given coordinate set. It is written before the REMARK 2 record # # "primary" = _citation.id # = _citation_author.citation_id # "yes"/"no" = _citation.coordinate_linkage # jrnl_rec_type [13-16] = # jrnl_cont [17-18] = # #AUTH # jrnl_auth [20-70] = _citation_author.name # #TITL # jrnl_titl [20-70] = _citation.title # #REF # jrnl_ref_jour [20-47] = _citation.journal_abbrev (this is not # always abbreviated but it will do) # jrnl_ref_vol [53-55] = _citation.journal_volume # "?" = _citation.journal_issue # jrnl_ref_page [57-61] = _citation.page_first # "?" = _citation.page_last # jrnl_ref_year [63-66] = _citation.year # #PUBL # jrnl_pub_pub [20-70] = _citation.book_publisher # # REFN # jrnl_astm [25-30] = _citation.journal_coden_ASTM or # = _citation.book_coden_ASTM # jrnl_country [33-34] = _citation.country # jrnl_isbm [41-65] = _citation.journal_coden_ISSN or # = _citation.book_coden_ISSN # "?" = _citation.abstract # # = _citation.details # Ohhh for a multi-dimensional array { if ($1 == "JRNL") { jrnl_rec_type = substr( ($0), 13, 4) jrnl_cont = substr( ($0), 17, 2) jrnl_title = substr( ($0), 20, 51) jrnl_auth = substr( ($0), 20, 51) text = substr( ($0), 20, 51) if (convtext == "yes") \ text=\ typeset(text) jrnl_title = text jrnl_auth = text cit_flag = 1 primary = "yes" if (jrnl_rec_type == "TITL" && jrnl_cont == " ") \ cit_title_1[cit_flag] = jrnl_title if (jrnl_rec_type == "TITL" && jrnl_cont == " 2") \ cit_title_2[cit_flag] = jrnl_title if (jrnl_rec_type == "TITL" && jrnl_cont == " 3") \ cit_title_3[cit_flag] = jrnl_title if (jrnl_rec_type == "TITL" && jrnl_cont == " 4") \ cit_title_4[cit_flag] = jrnl_title if (jrnl_rec_type == "TITL" && jrnl_cont == " 5") \ cit_title_5[cit_flag] = jrnl_title if (jrnl_rec_type == "TITL" && jrnl_cont == " 6") \ cit_title_6[cit_flag] = jrnl_title if (jrnl_rec_type == "AUTH" && jrnl_cont == " ") cit_auth_1[cit_flag] = jrnl_auth if (jrnl_rec_type == "AUTH" && jrnl_cont == " 2") cit_auth_2[cit_flag] = jrnl_auth if (jrnl_rec_type == "AUTH" && jrnl_cont == " 3") cit_auth_3[cit_flag] = jrnl_auth if (jrnl_rec_type == "AUTH" && jrnl_cont == " 4") cit_auth_4[cit_flag] = jrnl_auth if (jrnl_rec_type == "REF " && jrnl_cont == " ") { jour_1[cit_flag] = substr ( ($0), 20, 28) volu[cit_flag] = substr ( ($0), 53, 3) page[cit_flag] = substr ( ($0), 57, 5) year[cit_flag] = substr ( ($0), 63, 4) jrnl_pub_pub_1[cit_flag] = "?" } # Assume a max. of 3 REF records if (jrnl_rec_type == "REF " && jrnl_cont == " 2") { jour_2[cit_flag] = substr ( ($0), 20, 28) } if (jrnl_rec_type == "REF " && jrnl_cont == " 3") { jour_3[cit_flag] = substr ( ($0), 20, 28) } # Assume a max. of 4 PUBL records if (jrnl_rec_type == "PUBL" && jrnl_cont == " " ) { jrnl_pub_pub[cit_flag] = substr ( ($0), 20, 51) jour_1[cit_flag] = "?" volu[cit_flag] = "?" page[cit_flag] = "?" year[cit_flag] = "?" } if (jrnl_rec_type == "PUBL" && jrnl_cont == " ") { jrnl_pub_pub_1[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "PUBL" && jrnl_cont == " 2") { jrnl_pub_pub_2[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "PUBL" && jrnl_cont == " 3") { jrnl_pub_pub_3[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "REFN") { astm[cit_flag] = substr ( ($0), 25, 6) country[cit_flag] = substr ( ($0), 33, 2) issn[cit_flag] = substr ( ($0), 41, 25) } } } #============================================================================= # Keyword MASTER # # (used in END statement) { if ($1 == "MASTER") { # parse totals total_remark = substr( ($0), 11, 5) total_ftnote = substr( ($0), 16, 5) total_het = substr( ($0), 21, 5) total_helix = substr( ($0), 26, 5) total_sheet = substr( ($0), 31, 5) total_turn = substr( ($0), 36, 5) total_site = substr( ($0), 41, 5) total_o_s_m = substr( ($0), 46, 5) total_a_h = substr( ($0), 51, 5) total_ter = substr( ($0), 56, 5) total_conect = substr( ($0), 61, 5) total_seqres = substr( ($0), 66, 5) # # add disulphides if present to CONNCT list # if (ssbond_flag > 1 ) { for (i=1; i < ssbond_flag; ++i) { printf ("%3s disulf %3s %1s %4s ? ? ? 1_555", \ conect_flag, ssbond_res_name_beg[i], \ ssbond_chain_id_beg[i], ssbond_res_seq_num_beg[i]) printf (" %3s %1s %4s ? ? ? 1_555\n",\ ssbond_res_name_end[i], ssbond_chain_id_end[i], \ ssbond_res_seq_num_end[i] ) printf("%s\n", ssbond_comment[i]) ++conect_flag } } } } #============================================================================ # Keyword MTRIX # # Currently using the non-mmCIF definitions.. # _atom_sites_non_cryst_transf.matrix_id # _atom_sites_non_cryst_transf.matrix[1][1] .. [3][3] # _atom_sites_non_cryst_transf.vector[1] .. _3 # This will need to be rhevisited. # NB The Guide to Authors is wrong for MTRIX row, it goes 11-55 # { if ($1 == "MTRIX1" || $1 == "MTRIX2" || $1 == "MTRIX3") { mtrix_col1 = substr ( ($0), 11, 10) mtrix_col2 = substr ( ($0), 21, 10) mtrix_col3 = substr ( ($0), 31, 10) mtrix_col4 = substr ( ($0), 46, 10) # print loop headers if (mtrix_flag == "0") { mat_save[++mtrix_flag] = "\n\n\n" mat_save[++mtrix_flag] = "##############################\n" mat_save[++mtrix_flag] = "# #\n" mat_save[++mtrix_flag] = "# ATOM_SITES_NON_CRYST_TRANSF#\n" mat_save[++mtrix_flag] = "# #\n" mat_save[++mtrix_flag] = "##############################\n" mat_save[++mtrix_flag] = "\n" mat_save[++mtrix_flag] = "##### WARNING ################\n" mat_save[++mtrix_flag] = "# DOMAIN INFORMATION NEEDED #\n" mat_save[++mtrix_flag] = "##############################\n\n" mat_save[++mtrix_flag] = "\nloop_ \n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix_id\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.result_given\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[1][1]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[1][2]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[1][3]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.vector[1] \n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[2][1]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[2][2]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[2][3]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.vector[2] \n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[3][1]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[3][2]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.matrix[3][3]\n" mat_save[++mtrix_flag] = "_atom_sites_non_cryst_transf.vector[3] \n" } mtrix_id = substr( ($0), 8, 3) mtrix_given = substr( ($0),60, 1) x_given = "no " if (mtrix_given != " ") x_given = "yes" if ($1 == "MTRIX1") { mat_save[++mtrix_flag]=sprintf ("%3s %3s\n", mtrix_id, x_given) } mat_save[++mtrix_flag] = (mtrix_col1 " " \ mtrix_col2 " " \ mtrix_col3 " " \ mtrix_col4 "\n") } } #============================================================================ # Keyword ORIGX # # _database_pdb_matrix.origx[1][1] .. [3][3] # _database_pdb_matrix.origx_vector[1] .. _3 { if ($1 == "ORIGX1" || $1 == "ORIGX2" || $1 == "ORIGX3") { origx_col1 = substr ( ($0), 11, 10) origx_col2 = substr ( ($0), 21, 10) origx_col3 = substr ( ($0), 31, 10) origx_col4 = substr ( ($0), 46, 10) # print loop headers if (origx_flag == "0") { om_save[++origx_flag] = "\n\n\n" om_save[++origx_flag] = "\nloop_ \n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[1] \n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[2] \n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[3] \n" } origx_id = substr( ($0), 8, 3) om_save[++origx_flag] = (origx_col1 " " \ origx_col2 " " \ origx_col3 " " \ origx_col4 "\n" ) } } #=========================================================================== # Keyword REMARK # { if ($1 == "REMARK") { ++all_remarks # parse record remark_number = substr ( ($0), 8, 3) remark_cont = substr ( ($0),17, 2) jrnl_rec_type = substr ( ($0),13, 4) remark_text = substr ( ($0),12, 60) remark_cit_text = substr ( ($0),20, 51) if (convtext == "yes") \ remark_cit_text=\ typeset(remark_cit_text) remark_test = substr ( ($0),12, 3) # Deal with change of remark number remark_test = substr ( ($0), 12, 3) if (remark_number_old != remark_number) { remark_flag = "0" remark_number_old = remark_number if (remark_number != " 3" && \ remark_number != " 2" && \ remark_number != " 1") { printf (";\n\n") } } # type 1 remarks - additional references # data items identical to JRNL if (remark_number == " 1" && remark_test == "REF") ++cit_flag if (remark_number == " 1" && remark_test != "REF" \ && remark_test != " ") { # Assign TITL records if (jrnl_rec_type == "TITL" && remark_cont == " ") \ cit_title_1[cit_flag] = remark_cit_text if (jrnl_rec_type == "TITL" && remark_cont == " 2") \ cit_title_2[cit_flag] = remark_cit_text if (jrnl_rec_type == "TITL" && remark_cont == " 3") \ cit_title_3[cit_flag] = remark_cit_text if (jrnl_rec_type == "TITL" && remark_cont == " 4") \ cit_title_4[cit_flag] = remark_cit_text if (jrnl_rec_type == "TITL" && remark_cont == " 5") \ cit_title_5[cit_flag] = remark_cit_text if (jrnl_rec_type == "TITL" && remark_cont == " 6") \ cit_title_6[cit_flag] = remark_text # Assign AUTH records if (jrnl_rec_type == "AUTH" && remark_cont == " ") cit_auth_1[cit_flag] = remark_cit_text if (jrnl_rec_type == "AUTH" && remark_cont == " 2") cit_auth_2[cit_flag] = remark_cit_text if (jrnl_rec_type == "AUTH" && remark_cont == " 3") cit_auth_3[cit_flag] = remark_cit_text if (jrnl_rec_type == "AUTH" && remark_cont == " 4") cit_auth_4[cit_flag] = remark_cit_text # Assign EDIT records if (jrnl_rec_type == "EDIT" && remark_cont == " ") cit_edit_1[cit_flag] = remark_cit_text if (jrnl_rec_type == "EDIT" && remark_cont == " 2") cit_edit_2[cit_flag] = remark_cit_text if (jrnl_rec_type == "EDIT" && remark_cont == " 3") cit_edit_3[cit_flag] = remark_cit_text if (jrnl_rec_type == "EDIT" && remark_cont == " 4") cit_edit_4[cit_flag] = remark_cit_text # Assign REF records if (jrnl_rec_type == "REF " && remark_cont == " ") { jour_1[cit_flag] = substr ( ($0), 20, 28) volu[cit_flag] = substr ( ($0), 53, 3) page[cit_flag] = substr ( ($0), 57, 5) year[cit_flag] = substr ( ($0), 63, 4) jrnl_pub_pub_1[cit_flag] = "?" } if (jrnl_rec_type == "REF " && remark_cont == " 2") { jour_2[cit_flag] = substr ( ($0), 20, 28) } if (jrnl_rec_type == "REF " && remark_cont == " 3") { jour_3[cit_flag] = substr ( ($0), 20, 28) } # Assign PUBL records if (jrnl_rec_type == "PUBL" && remark_cont == " ") { jrnl_pub_pub_1[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "PUBL" && remark_cont == " 2") { jrnl_pub_pub_2[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "PUBL" && remark_cont == " 3") { jrnl_pub_pub_3[cit_flag] = substr ( ($0), 20, 51) } if (jrnl_rec_type == "REFN") { astm[cit_flag] = substr ( ($0), 25, 6) country[cit_flag] = substr ( ($0), 33, 2) issn[cit_flag] = substr ( ($0), 41, 25) } ++remark_flag } # # type 2 remarks - resolution # if (remark_number == " 2" && $3 == "RESOLUTION.") { # # print all citations from JNRL and REMARK 1 records # if (jrnl_flag == "1") { printf ("\nloop_\n") printf ("_citation.id\n") printf ("_citation.coordinate_linkage\n") printf ("_citation.title\n") printf ("_citation.country\n") printf ("_citation.journal_abbrev\n") printf ("_citation.journal_volume\n") printf ("_citation.journal_issue\n") printf ("_citation.page_first\n") printf ("_citation.year\n") printf ("_citation.journal_coden_ASTM\n") printf ("_citation.journal_coden_ISSN\n") printf ("_citation.journal_coden_PDB\n") printf ("_citation.book_title\n") printf ("_citation.book_publisher\n") printf ("_citation.book_coden_ISBN\n") printf ("_citation.details\n") ++jrnl_flag } for (i=1; i <= cit_flag; ++i) { if (i == "1" && primary) { printf (" \nprimary yes\n") } else { printf (" \n%3s no\n", i) } # for books # _citation.title == TITL (if present) # _citation.country == country[i] # _citation.journal_abbrev == ? # _citation.journal_volume == volu[i] # _citation.journal_issue == ? # _citation.page_first == ? # _citation.year == year[i] # _citation.journal_coden_ASTM == ? # _citation.journal_coden_ISSN == ? # _citation.journal_coden_PDB == ? # _citation.book_title == REF (jour_x) # _citation.book_publisher == jrnl_pub_pub_x[i] # _citation.book_coden_ISBN == REFN ?? # _citation.details == ? if (jrnl_pub_pub_1[i] != "?") { if (country[i] == " ") country[i] = "?" if (jour_1[i] == " ") \ jour_1[i] = "?" if (volu[i] == " ") volu[i] = "?" if (year[i] == " ") year[i] = "?" if (page[i] == " ") page[i] = "?" if (cit_title_1[i]) printf ("; %s\n", cit_title_1[i]) else printf (" ? ") if (cit_title_2[i]) printf (" %s\n", cit_title_2[i]) if (cit_title_3[i]) printf (" %s\n", cit_title_3[i]) if (cit_title_4[i]) printf (" %50s\n", cit_title_4[i]) if (cit_title_5[i]) printf (" %50s\n", cit_title_5[i]) if (cit_title_1[i]) printf (";\n") printf (" %2s ? %3s ? %5s %4s ? ? ?\n" \ ,country[i],volu[i], page[i], year[i]) if (!jour_2[i]) { printf (" '%28s' \n", jour_1[i]) } if(jour_2[i] && !jour_3[i]) { printf (" '%28s %-28s' \n",jour_1[i],jour_2[i])} if(jour_3[i]) { printf (" ; %28s %28s\n %-28s\n;\n", \ jour_1[i], jour_2[i], jour_3[i])} if (jrnl_pub_pub_1[i]) printf (" ; %51s \n", jrnl_pub_pub_1[i]) if (jrnl_pub_pub_2[i]) printf (" %51s \n", jrnl_pub_pub_2[i]) if (jrnl_pub_pub_3[i]) printf (" %51s \n", jrnl_pub_pub_3[i]) if (jrnl_pub_pub_1[i]) printf (";\n") printf (" '%25s' ? \n", issn[i]) } else { # for journals if (cit_title_1[i]) printf ("; %50s\n", cit_title_1[i]) if (cit_title_2[i]) printf (" %50s\n", cit_title_2[i]) if (cit_title_3[i]) printf (" %50s\n", cit_title_3[i]) if (cit_title_4[i]) printf (" %50s\n", cit_title_4[i]) if (cit_title_5[i]) printf (" %50s\n", cit_title_5[i]) printf (";\n %2s '%28s' %3s ? %5s %4s \n'%-15s' '%15s' ? ? ? ? ?\n",\ country[i], jour_1[i], volu[i], page[i], year[i], astm[i], issn[i]) } } # Loop Editor List for (i=1; i <= cit_flag; ++i) { if (cit_edit_1[i]) { printf ("\nloop_\n") printf ("_citation_editor.citation_id\n") printf ("_citation_editor.name\n") break } } for (i=1; i <= cit_flag; ++i) { if (cit_edit_1[i]) { num_edit = split( cit_edit_1[i], editors, "," ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split( editors[ii], e_split, " ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } } if (cit_edit_2[i]) (num_edit = num_edit-1) for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i, editors[j]) } } if (cit_edit_2[i]) { num_edit = split( cit_edit_2[i], editors, "," ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split( editors[ii], e_split, " ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } } if (cit_edit_3[i]) (num_edit = num_edit-1) for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i, editors[j]) } } if (cit_edit_3[i]) { num_edit = split( cit_edit_3[i], editors, "," ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split( editors[ii], e_split, " ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } } if (cit_edit_4[i]) (num_edit = num_edit-1) for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i, editors[j]) } } if (cit_edit_4[i]) { num_edit = split( cit_edit_4[i], editors, "," ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split( editors[ii], e_split, " ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } } for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i, editors[j]) } } } # Loop Author List for (i=1; i <= cit_flag; ++i) { if (cit_auth_1[i]) { printf ("\nloop_\n") printf ("_citation_author.citation_id\n") printf ("_citation_author.name\n") break } } for (i=1; i <= cit_flag; ++i) { if (cit_auth_1[i]) { num_auth = split( cit_auth_1[i], authors, "," ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split( authors[ii], a_split, " ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } } if (cit_auth_2[i]) (num_auth = num_auth-1) for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i, authors[j]) } } if (cit_auth_2[i]) { num_auth = split( cit_auth_2[i], authors, "," ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split( authors[ii], a_split, " ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } } if (cit_auth_3[i]) (num_auth = num_auth-1) for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i, authors[j]) } } if (cit_auth_3[i]) { num_auth = split( cit_auth_3[i], authors, "," ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split( authors[ii], a_split, " ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } } if (cit_auth_4[i]) (num_auth = num_auth-1) for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i, authors[j]) } } if (cit_auth_4[i]) { num_auth = split( cit_auth_4[i], authors, "," ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split( authors[ii], a_split, " ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } } for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i, authors[j]) } } } resolution = substr ( ($0), 23, 45) num_split = split( resolution, res_split, " ") if ( res_split[1] == "NOT" ) res_split = "." printf ("\n_reflns.d_resolution_high %8.2g \n", res_split[1] ) ++remark_flag # Include _exptl templates if (verbose) { printf ("_exptl.absorpt_coefficient_mu ? \n") printf ("_exptl.absorpt_correction_T_max ? \n") printf ("_exptl.absorpt_correction_type ? \n") printf ("_exptl.absorpt_process_details ? \n\n") printf ("_exptl_crystal.colour ? \n") printf ("_exptl_crystal.density_diffrn ? \n") printf ("_exptl_crystal.density_meas ? \n") printf ("_exptl_crystal.density_meas_temp ? \n") printf ("_exptl_crystal.density_method ? \n") printf ("_exptl_crystal.description ? \n") printf ("_exptl_crystal.F_000 ? \n") printf ("_exptl_crystal_face.diffr_chi ? \n") printf ("_exptl_crystal_face.diffr_kappa ? \n") printf ("_exptl_crystal_face.diffr_phi ? \n") printf ("_exptl_crystal_face.diffr_psi ? \n") printf ("_exptl_crystal_face.index_h ? \n") printf ("_exptl_crystal_face.index_k ? \n") printf ("_exptl_crystal_face.index_l ? \n") printf ("_exptl_crystal_face.perp_dist ? \n") printf ("_exptl_crystal.id ? \n") printf ("_exptl_crystal.preparation ? \n") printf ("_exptl_crystal.size_max ? \n") printf ("_exptl_crystal.size_mid ? \n") printf ("_exptl_crystal.size_min ? \n") printf ("_exptl_crystal.size_rad ? \n") printf ("_exptl.crystals_number ? \n") printf ("_exptl_crystal_grow.apparatus ? \n") printf ("_exptl_crystal_grow.atmosphere ? \n") printf ("_exptl_crystal_grow.crystal_id ? \n") printf ("_exptl_crystal_grow.details ? \n") printf ("_exptl_crystal_grow.method ? \n") printf ("_exptl_crystal_grow.method_ref ? \n") printf ("_exptl_crystal_grow.pH ? \n") printf ("_exptl_crystal_grow.pressure ? \n") printf ("_exptl_crystal_grow.seeding ? \n") printf ("_exptl_crystal_grow.seeding_ref ? \n") printf ("_exptl_crystal_grow.temp ? \n") printf ("_exptl_crystal_grow.time ? \n") printf ("\nloop_\n") printf ("_exptl_crystal_grow_comp.crystal_id \n") printf ("_exptl_crystal_grow_comp.id \n") printf ("_exptl_crystal_grow_comp.conc \n") printf ("_exptl_crystal_grow_comp.details \n") printf ("_exptl_crystal_grow_comp.name \n") printf ("_exptl_crystal_grow_comp.sol_id \n") printf ("_exptl_crystal_grow_comp.volume \n") printf (" ? ? ? ? ? ? ? \n\n") } # Include additional data items to be added on diffraction experiment. # A rigourous treatment of REMARK 3 might be able to parse some of # this info. if (verbose) { printf ("_diffrn.ambient_temp ? \n") printf ("_diffrn.ambient_pressure ? \n") printf ("_diffrn_attenuator.code ? \n") printf ("_diffrn_attenuator.scale ? \n") printf ("_diffrn.details ? \n\n") printf ("_diffrn.ambient_environment ? \n") printf ("_diffrn.crystal_support ? \n") printf ("_diffrn.crystal_treatment ? \n\n") printf ("_diffrn_measurement.method ? \n") printf ("_diffrn_measurement.details ? \n") printf ("_diffrn_measurement.device ? \n") printf ("_diffrn_measurement.device_details ? \n") printf ("_diffrn_measurement.device_specific ? \n") printf ("_diffrn_measurement.device_type ? \n") printf ("_diffrn_orient_matrix.type ? \n") printf ("_diffrn_orient_matrix.UB[1][1] ? \n") printf ("_diffrn_orient_matrix.UB[1][2] ? \n") printf ("_diffrn_orient_matrix.UB[1][3] ? \n") printf ("_diffrn_orient_matrix.UB[2][1] ? \n") printf ("_diffrn_orient_matrix.UB[2][2] ? \n") printf ("_diffrn_orient_matrix.UB[2][3] ? \n") printf ("_diffrn_orient_matrix.UB[3][1] ? \n") printf ("_diffrn_orient_matrix.UB[3][2] ? \n") printf ("_diffrn_orient_matrix.UB[3][3] ? \n\n") printf ("loop_\n") printf ("_diffrn_orient_refln.index_h\n") printf ("_diffrn_orient_refln.index_k\n") printf ("_diffrn_orient_refln.index_l\n") printf ("_diffrn_orient_refln.angle_chi\n") printf ("_diffrn_orient_refln.angle_kappa\n") printf ("_diffrn_orient_refln.angle_phi\n") printf ("_diffrn_orient_refln.angle_psi\n") printf (" ? ? ? ? ? ? ?\n\n") printf ("_diffrn_radiation.filter_edge ? \n") printf ("_diffrn_radiation.inhomogeneity ? \n") printf ("_diffrn_radiation.monochromator ? \n") printf ("_diffrn_radiation.polarisn_norm ? \n") printf ("_diffrn_radiation.polarisn_ratio ? \n") printf ("_diffrn_radiation.collimation ? \n") printf ("_diffrn_radiation.type ? \n\n") printf ("loop_\n") printf ("_diffrn_radiation.wavelength \n") printf ("_diffrn_radiation.wavelength_id \n") printf ("_diffrn_radiation.wavelength_wt \n") printf (" ? ? ? \n\n") printf ("_diffrn_radiation.detector ? \n") printf ("_diffrn_radiation.detector_dtime ? \n") printf ("_diffrn_radiation.detector_details ? \n") printf ("_diffrn_radiation.detector_type ? \n\n") printf ("_diffrn_radiation.source ? \n") printf ("_diffrn_radiation.source_details ? \n") printf ("_diffrn_radiation.source_power ? \n") printf ("_diffrn_radiation.source_specific ? \n") printf ("_diffrn_radiation.source_target ? \n") printf ("_diffrn_radiation.source_type ? \n\n") printf ("loop_\n") printf ("_diffrn_refln.index_h \n") printf ("_diffrn_refln.index_k \n") printf ("_diffrn_refln.index_l \n") printf ("_diffrn_refln.angle_chi \n") printf ("_diffrn_refln.angle_kappa \n") printf ("_diffrn_refln.angle_omega \n") printf ("_diffrn_refln.angle_phi \n") printf ("_diffrn_refln.angle_psi \n") printf ("_diffrn_refln.angle_theta \n") printf ("_diffrn_refln.attenuator_code \n") printf ("_diffrn_refln.counts_bg_1 \n") printf ("_diffrn_refln.counts_bg_2 \n") printf ("_diffrn_refln.counts_net \n") printf ("_diffrn_refln.counts_peak \n") printf ("_diffrn_refln.counts_total \n") printf ("_diffrn_refln.crystal_id \n") printf ("_diffrn_refln.detect_slit_horiz \n") printf ("_diffrn_refln.detect_slit_vert \n") printf ("_diffrn_refln.elapsed_time \n") printf ("_diffrn_refln.intensity_net \n") printf ("_diffrn_refln.intensity_sigma \n") printf ("_diffrn_refln.scale_group_code \n") printf ("_diffrn_refln.scan_mode \n") printf ("_diffrn_refln.scan_mode_backgd \n") printf ("_diffrn_refln.scan_width \n") printf ("_diffrn_refln.sint_over_lambda \n") printf ("_diffrn_refln.standard_code\n") printf ("_diffrn_refln.wavelength \n") printf ("_diffrn_refln.wavelength_id \n") printf \ (" ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n\n") printf ("_diffrn_reflns.av_R_equivalents ? \n") printf ("_diffrn_reflns.av_sigmaI/netI ? \n") printf ("_diffrn_reflns.limit_h_max ? \n") printf ("_diffrn_reflns.limit_h_min ? \n") printf ("_diffrn_reflns.limit_k_max ? \n") printf ("_diffrn_reflns.limit_k_min ? \n") printf ("_diffrn_reflns.limit_l_max ? \n") printf ("_diffrn_reflns.limit_l_min ? \n") printf ("_diffrn_reflns.number ? \n") printf ("_diffrn_reflns.reduction_process ? \n") printf ("_diffrn_reflns.theta_max ? \n") printf ("_diffrn_reflns.theta_min ? \n") printf ("_diffrn_reflns.transf_matrix[1][1] ? \n") printf ("_diffrn_reflns.transf_matrix[1][2] ? \n") printf ("_diffrn_reflns.transf_matrix[1][3] ? \n") printf ("_diffrn_reflns.transf_matrix[2][1] ? \n") printf ("_diffrn_reflns.transf_matrix[2][2] ? \n") printf ("_diffrn_reflns.transf_matrix[2][3] ? \n") printf ("_diffrn_reflns.transf_matrix[3][1] ? \n") printf ("_diffrn_reflns.transf_matrix[3][2] ? \n") printf ("_diffrn_reflns.transf_matrix[3][3] ? \n\n") printf ("loop_\n") printf ("_diffrn_scale_group.code \n") printf ("_diffrn_scale_group.I_net \n") printf (" ? ? \n\n") printf ("loop_\n") printf ("_diffrn_standard_refln.index_h \n") printf ("_diffrn_standard_refln.index_k \n") printf ("_diffrn_standard_refln.index_l \n") printf ("_diffrn_standard_refln.code \n") printf (" ? ? ? ? \n\n") printf ("_diffrn_standards.decay_% ? \n") printf ("_diffrn_standards.interval_count ? \n") printf ("_diffrn_standards.interval_time ? \n") printf ("_diffrn_standards.number ? \n") printf ("_diffrn_standards.scale_sigma ? \n") printf ("\nloop_\n") printf ("_refln.index_h \n") printf ("_refln.index_k \n") printf ("_refln.index_l \n") printf ("_refln.A_meas \n") printf ("_refln.A_calc \n") printf ("_refln.B_meas \n") printf ("_refln.B_calc \n") printf ("_refln.crystal_id \n") printf ("_refln.F_meas \n") printf ("_refln.F_calc \n") printf ("_refln.F_sigma \n") printf ("_refln.F_squared_meas \n") printf ("_refln.F_squared_calc \n") printf ("_refln.F_squared_sigma \n") printf ("_refln.intensity_meas \n") printf ("_refln.intensity_calc \n") printf ("_refln.intensity_sigma \n") printf ("_refln.mean_path_length_tbar \n") printf ("_refln.status \n") printf ("_refln.phase_meas \n") printf ("_refln.phase_calc \n") printf ("_refln.refinement_status \n") printf ("_refln.scale_group_code \n") printf ("_refln.sint_over_lambda \n") printf ("_refln.symmetry_epsilon \n") printf ("_refln.symmetry_multiplicity \n") printf ("_refln.wavelength \n") printf ("_refln.wavelength_id \n") printf\ (" ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n\n") printf ("_reflns.d_resolution_high ? \n") printf ("_reflns.d_resolution_low ? \n") printf ("_reflns.limit_h_max ? \n") printf ("_reflns.limit_h_min ? \n") printf ("_reflns.limit_k_max ? \n") printf ("_reflns.limit_k_min ? \n") printf ("_reflns.limit_l_max ? \n") printf ("_reflns.limit_l_min ? \n") printf ("_reflns.number_all ? \n") printf ("_reflns.number_obs ? \n") printf ("_reflns.observed_criterion ? \n") printf ("\nloop_\n") printf ("_reflns_scale.group_code \n") printf ("_reflns_scale.meas_F \n") printf ("_reflns_scale.meas_F_squared \n") printf ("_reflns_scale.meas_intensity \n") printf (" ? ? ? ? \n\n") printf ("_reflns.details ? \n") printf ("_reflns_data_reduction_method ? \n\n") printf ("\nloop_\n") printf ("_reflns_shell.d_res_high \n") printf ("_reflns_shell.d_res_low \n") printf ("_reflns_shell.count_measured_all \n") printf ("_reflns_shell.count_measured_obs \n") printf ("_reflns_shell.count_possible \n") printf ("_reflns_shell.count_unique_all \n") printf ("_reflns_shell.count_unique_obs \n") printf ("_reflns_shell.meanI/sigI_all \n") printf ("_reflns_shell.meanI/sigI_obs \n") printf ("_reflns_shell.possible_%%_all \n") printf ("_reflns_shell.possible_%%_obs \n") printf ("_reflns_shell.Rmerge_F_all \n") printf ("_reflns_shell.Rmerge_I_all \n") printf ("_reflns_shell.Rmerge_I_obs \n") printf (" ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n\n") printf ("_phasing_averaging.details ? \n") printf ("_phasing_averaging.method ? \n") printf ("_phasing_isomorphous.details ? \n") printf ("_phasing_isomorphous.method ? \n") printf ("_phasing_isomorphous.parent ? \n") printf ("_phasing_MAD.details ? \n") printf ("_phasing_MAD.method ? \n") printf ("_phasing_MIR.details ? \n") printf ("_phasing_MIR.method ? \n") printf ("\nloop_\n") printf ("_phasing_MIR_der.id \n") printf ("_phasing_MIR_der.number_of_sites \n") printf ("_phasing_MIR_der.details \n") printf ("_phasing_MIR_der.reflns_criteria \n") printf (" ? ? ? ? \n") printf ("\nloop_\n") printf ("_phasing_MIR_der_shell.der_id \n") printf ("_phasing_MIR_der_shell.d_res_low \n") printf ("_phasing_MIR_der_shell.d_res_high \n") printf ("_phasing_MIR_der_shell.fom \n") printf ("_phasing_MIR_der_shell.ha_ampl \n") printf ("_phasing_MIR_der_shell.loc \n") printf ("_phasing_MIR_der_shell.phase \n") printf ("_phasing_MIR_der_shell.power \n") printf ("_phasing_MIR_der_shell.R_Cullis \n") printf ("_phasing_MIR_der_shell.R_Kraut \n") printf ("_phasing_MIR_der_shell.reflns \n") printf (" ? ? ? ? ? ? ? ? ? ? ?\n") printf ("\nloop_\n") printf ("_phasing_MIR_shell.d_res_high \n") printf ("_phasing_MIR_shell.d_res_low \n") printf ("_phasing_MIR_shell.fom \n") printf ("_phasing_MIR_shell.loc \n") printf ("_phasing_MIR_shell.mean_phase \n") printf ("_phasing_MIR_shell.power \n") printf ("_phasing_MIR_shell.R_Cullis \n") printf ("_phasing_MIR_shell.R_Kraut \n") printf ("_phasing_MIR_shell.reflns \n") printf (" ? ? ? ? ? ? ? ? ? \n") printf ("\nloop_\n") printf ("_phasing_MIR_der_site.der_id \n") printf ("_phasing_MIR_der_site.id \n") printf ("_phasing_MIR_der_site.B_iso \n") printf ("_phasing_MIR_der_site.Cartn_x \n") printf ("_phasing_MIR_der_site.Cartn_y \n") printf ("_phasing_MIR_der_site.Cartn_z \n") printf ("_phasing_MIR_der_site.fract_x \n") printf ("_phasing_MIR_der_site.fract_y \n") printf ("_phasing_MIR_der_site.fract_z \n") printf ("_phasing_MIR_der_site.occupancy \n") # printf ("_phasing_MIR_der_site.details \n") printf (" ? ? ? ? ? ? ? ? ? ? \n\n") } } # # type 3 remarks - refinmement details # if (remark_number == " 3") { if (remark_flag == "0") { if (verbose) { printf ("_refine.diff_density_max ? \n") printf ("_refine.diff_density_min ? \n") printf ("_refine.ls_abs_structure_details ? \n") printf ("_refine.ls_abs_structure_Flack ? \n") printf ("_refine.ls_abs_structure_Rogers ? \n") printf ("_refine.ls_extinction_coef ? \n") printf ("_refine.ls_extinction_method ? \n") printf ("_refine.ls_goodness_of_fit_all ? \n") printf ("_refine.ls_goodness_of_fit_obs ? \n") printf ("_refine.ls_hydrogen_treatment ? \n") printf ("_refine.ls_matrix_type ? \n") printf ("_refine.ls_number_constraints ? \n") printf ("_refine.ls_number_parameters ? \n") printf ("_refine.ls_number_reflns_obs ? \n") printf ("_refine.ls_number_restraints ? \n") printf ("_refine.ls_R_factor_all ? \n") printf ("_refine.ls_R_factor_obs ? \n") printf ("_refine.ls_restrained_S_all ? \n") printf ("_refine.ls_restrained_S_obs ? \n") printf ("_refine.ls_shift_over_esd_max ? \n") printf ("_refine.ls_shift_over_esd_max ? \n") printf ("_refine.ls_structure_factor_coef ? \n") # printf ("_refine.ls_weighting_details ? \n") printf ("_refine.ls_weighting_scheme ? \n") printf ("_refine.ls_wR_factor_all ? \n") printf ("_refine.ls_wR_factor_obs ? \n") printf ("_refine.details ? \n") printf ("_refine.occupancy_max ? \n") printf ("_refine.occupancy_min ? \n") printf ("_refine.B_iso_max ? \n") printf ("_refine.B_iso_min ? \n") printf ("_refine_ls_restr.criterion ? \n") printf ("_refine_ls_restr.dev_ideal ? \n") printf ("_refine_ls_restr.number ? \n") printf ("_refine_ls_restr.rejects ? \n") printf ("_refine_ls_restr.dev_ideal_target ? \n") printf ("_refine_ls_restr.type ? \n") printf ("\nloop_\n") printf ("_refine_ls_shell.d_res_high \n") printf ("_refine_ls_shell.d_res_low \n") printf ("_refine_ls_shell.reflns \n") printf ("_refine_ls_shell.R_factor_all \n") printf ("_refine_ls_shell.R_factor_obs \n") printf ("_refine_ls_shell.wR_factor_all \n") printf ("_refine_ls_shell.wR_factor_obs \n") printf (" ? ? ? ? ? ? ?\n") printf ("\nloop_\n") printf ("_refine_occupancy_class \n") printf ("_refine_occupancy_details \n") printf ("_refine_occupancy_treatment \n") printf ("_refine_occupancy_value \n") printf (" ? ? ? ? \n\n") printf ("\nloop_\n") printf ("_refine_iso_B_class \n") printf ("_refine_iso_B_details \n") printf ("_refine_iso_B_treatment \n") printf ("_refine_iso_B_value \n") printf (" ? ? ? ? \n") } } } # # type 3-x remarks if (remark_number != " 1" && remark_number != " 2") { if (remark_header_flag == "0") { printf ("\nloop_\n") printf ("_database_PDB_remark.id\n") printf ("_database_PDB_remark.text\n") ++remark_header_flag } if (remark_flag == "0") { printf ("%3s\n;", remark_number ) } printf (" %-60s \n", remark_text) ++remark_flag ++flag } } } #=========================================================================== # Keyword REVDAT # # rev_mod_number [8-11] = _database_PDB_rev.num # = _database_PDB_rev_record.rev_num # rev_cont [11-12] = [continuation flag] # rev_date [14-22] = _database_PDB_rev.date # date in _audit.update_record # rev_name [24-28] = _database_PDB_rev_record.details # PBD revision name in _audit.update_record # rev_type [32] = _database_PDB_rev.mod_type # rev_rec_corr [40-70] = _database_PDB_rev_record.type # # { if ($1 == "REVDAT") { rev_mod_number[revdat_flag] = substr( ($0), 8, 3) rev_date[revdat_flag] = substr( ($0), 14, 9) if( rev_date[revdat_flag] == " "){ rev_date[revdat_flag] = rev_date[revdat_flag-1] } rev_cont_flag[revdat_flag] = substr( ($0), 11, 1) rev_name[revdat_flag] = substr( ($0), 24, 5) rev_type[revdat_flag] = substr( ($0), 32, 1) rev_rec_corr[revdat_flag] = substr( ($0), 40,31) rev_mod_of_date[rev_date[revdat_flag]] = \ rev_mod_number[revdat_flag]+0 # # The latest revision comes first, take the name # for _audit.revision_id # if (revdat_flag == 1) aud_rev_id = rev_name[revdat_flag] # # if first REVDAT of a block, save pointer for audit.update_record # if (rev_cont_flag[revdat_flag] == " " ) { audit_flag++ audit_point[audit_flag] = revdat_flag } # change date format cal_date = split( rev_date[revdat_flag], dmy, "-" ) rev_date_year[revdat_flag] = yyyy[dmy[3]+0] rev_date_mon[revdat_flag] = mmm2mm[dmy[2]] rev_date_day[revdat_flag] = dmy[1] ++revdat_flag } } #=========================================================================== # Keyword SPRSDE and OBSLTE # # s_o_cont [9-10] = [continuation flag] # s_o_date [12-20] = _database_PDB_rev.date # used to find _database_PDB_rev.num # s_o_name [22-25] = _database_PDB_rev_record.details # s_o_list [32-70] = _database_PDB_rev.replaces (SPRSDE) # = _database_PDB_rev.replaced_by (OBSLTE) # # { if ($1 == "SPRSDE" || $1 == "OBSLTE") { s_o_type[++s_o_flag] = $1 s_o_cont[s_o_flag] = substr( ($0), 9, 2) s_o_date[s_o_flag] = substr( ($0),12, 9) s_o_name[s_o_flag] = substr( ($0),22, 4) s_o_list[s_o_flag] = substr( ($0),32,39) } } #=========================================================================== # Keyword SCALE # CIF provides data items to convert from orthogonal to fractional # coordinates. # { if ($1 == "SCALE1" || $1 == "SCALE2" || $1 == "SCALE3") { scale_col1 = substr( ($0), 11,10) scale_col2 = substr( ($0), 21,10) scale_col3 = substr( ($0), 31,10) scale_col4 = substr( ($0), 46,10) if ($1 == "SCALE1") { sc_save[++scale_flag] = "\n" sc_save[++scale_flag] = \ ("_atom_sites.cartn_transform_axes\n" \ " 'See _atom_sites.frac_transf_matrix[i][j]'\n") sc_save[++scale_flag] = "loop_\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[1][1]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[1][2]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[1][3]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_vector[1]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[2][1]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[2][2]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[2][3]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_vector[2]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[3][1]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[3][2]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_matrix[3][3]\n" sc_save[++scale_flag] = \ "_atom_sites.frac_transf_vector[3]\n" } sc_save[++scale_flag] = (scale_col1 " " \ scale_col2 " " \ scale_col3 " " \ scale_col4 "\n") } } #========================================================================== # Keyword SEQRES # # seq_record_number [9-10] = not used # seq_chain_id [12] = struct_asym.id # used to obtain entity_poly_seq.entity_id # see code in END block # seq_text [20-72] = _entity_seq_mon_id # seq_number = entity_poly_seq.num # seq_flag = num of sequence records # { if ($1 == "SEQRES") { seq_record_number = substr( ($0), 9, 2) seq_chain_id[seqres_flag] = substr( ($0), 12,1) seq_text[seqres_flag] = substr( ($0), 20, 53) if (seq_chain_id[seqres_flag] == " ") \ seq_chain_id[seqres_flag] = "." seq_entity = seq_chain_id[seqres_flag] ent_poly_id[seq_entity]++ if (ent_poly_id[seq_entity] == 1) { next_poly_id=ent_poly_point[" "] prev_poly_id=" " while(next_poly_id != "") { prev_poly_id = next_poly_id next_poly_id = ent_poly_point[prev_poly_id] } ent_poly_point[prev_poly_id] = seq_entity ent_poly_point[seq_entity] = "" ++num_poly_ents ent_poly_num[seq_entity] = num_poly_ents entity_seq_num[seq_entity] = num_poly_ents entities[num_poly_ents] = seq_entity } ++seqres_flag } } #========================================================================= # Keyword SHEET # # # sheet_strand_no [8-10] = _struct_sheet_range.id # _struct_sheet_hbond.range_id_* # sheet_id [12-14] = _struct_sheet.id # _struct_sheet_hbond.sheet_id # _struct_sheet_order.sheet_id # _struct_sheet_range.sheet_id # sheet_no_strands [15-16] = _struct_sheet.number_strands # sheet_res_name_beg [18-20] = _struct_sheet_range.beg_label_comp_id # sheet_chain_id_beg [22] = _struct_sheet_range.beg_label_asym_id # sheet_res_seq_beg [23-27] = _struct_sheet_range.beg_label_seq_id # sheet_res_name_end [29-31] = _struct_sheet_range.end_label_comp_id # sheet_chain_id_end [33] = _struct_sheet_range.end_label_asym_id # sheet_res_seq_end [34-38] = _struct_sheet_range.end_label_seq_id # sheet_sense [39-40] = _struct_sheet_order.sense # sheet_atom_name_reg_1 [42-45] = # _struct_sheet_hbond.range_1_beg_label_atom_id # sheet_res_name_reg_1 [46-48] = # _struct_sheet_hbond.range_1_beg_seq_id # sheet_chain_id_reg_1 [50] = # _struct_sheet_hbond.range_1_beg_asym_id # sheet_res_seq_reg_1 [51-55] = # _struct_sheet_hbond.range_1_beg_label_seq_id # sheet_atom_name_reg_2 [57-60] = # _struct_sheet_hbond.range_2_beg_label_atom_id # sheet_res_name_reg_2 [61-63] = # _struct_sheet_hbond.range_2_beg_seq_id # sheet_chain_id_reg_2 [65] = # _struct_sheet_hbond.range_2_beg_asym_id # sheet_res_seq_reg_2 [66-70] = # _struct_sheet_hbond.range_2_beg_label_seq_id # # *** note: The hbond.range_*_end values will be set to the hbond_range*_beg # values, since the PDB format provides only one sample hydrogen # bond for registration { if ($1 == "SHEET") { # Parse field sheet_strand_no[++sheet_flag] = substr( ($0), 8, 3)+0 sheet_id[sheet_flag] = substr( ($0),12, 3) sheet_no_strands[sheet_flag] = substr( ($0),15, 2)+0 sheet_res_name_beg[sheet_flag] = substr( ($0),18, 3) sheet_chain_id_beg[sheet_flag] = substr( ($0),22, 1) sheet_res_seq_beg[sheet_flag] = substr( ($0),23, 5) sheet_res_name_end[sheet_flag] = substr( ($0),29, 3) sheet_chain_id_end[sheet_flag] = substr( ($0),33, 1) sheet_res_seq_end[sheet_flag] = substr( ($0),34, 5) sheet_sense[sheet_flag] = substr( ($0),39, 2) sheet_atom_name_reg_1[sheet_flag]=\ squeezname(substr( ($0),42, 4)) sheet_res_name_reg_1[sheet_flag] = substr( ($0),46, 3) sheet_chain_id_reg_1[sheet_flag] = substr( ($0),50, 1) sheet_res_seq_reg_1[sheet_flag] = substr( ($0),51, 5) sheet_atom_name_reg_2[sheet_flag]=\ squeezname(substr( ($0),57, 4)) sheet_res_name_reg_2[sheet_flag] = substr( ($0),61, 3) sheet_chain_id_reg_2[sheet_flag] = substr( ($0),65, 1) sheet_res_seq_reg_2[sheet_flag] = substr( ($0),66, 5) if (sheet_chain_id_beg[sheet_flag] == " ") \ sheet_chain_id_beg[sheet_flag] = "." if (sheet_chain_id_end[sheet_flag] == " ") \ sheet_chain_id_end[sheet_flag] = "." if (sheet_sense[sheet_flag] == " 1") sheet_sense[sheet_flag] = "parallel" if (sheet_sense[sheet_flag] == "-1") \ sheet_sense[sheet_flag] = "anti-parallel" if (sheet_atom_name_reg_1[sheet_flag] == " " || \ sheet_atom_name_reg_1[sheet_flag] == "") \ sheet_atom_name_reg_1[sheet_flag] = " . " if (sheet_res_name_reg_1[sheet_flag] == " " || \ sheet_res_name_reg_1[sheet_flag] == "") \ sheet_res_name_reg_1[sheet_flag] = " . " if (sheet_chain_id_reg_1[sheet_flag] == " ") \ sheet_chain_id_reg_1[sheet_flag] = "." if (sheet_res_seq_reg_1[sheet_flag] == " " || \ sheet_res_seq_reg_1[sheet_flag] == "") \ sheet_res_seq_reg_1[sheet_flag] " . " if (sheet_atom_name_reg_2[sheet_flag] == " " || \ sheet_atom_name_reg_2[sheet_flag] == "") \ sheet_atom_name_reg_2[sheet_flag] = " . " if (sheet_res_name_reg_2[sheet_flag] == " " || \ sheet_res_name_reg_2[sheet_flag] == "") \ sheet_res_name_reg_2[sheet_flag] = " . " if (sheet_chain_id_reg_2[sheet_flag] == " ") \ sheet_chain_id_reg_2[sheet_flag] = "." if (sheet_res_seq_reg_2[sheet_flag] == " " || \ sheet_res_seq_reg_2[sheet_flag] == "") \ sheet_res_seq_reg_2[sheet_flag] " . " } } #========================================================================== # Keyword SITE # # site_seq_no [8-10] = _struct_site_gen_id # site_id [12-14] = _struct_siite_id # = _struct_site_gen_site_id # site_no_res [16-17] = # site_res_name_1 [19-21] = _struct_site_gen_label_comp_id # site_res_name_2 [30-32] # site_res_name_3 [41-43] # site_res_name_4 [52-54] # site_chain_id_1 [23] = _struct_site_gen_label_asym_id # site_chain_id_2 [34] # site_chain_id_3 [45] # site_chain_id_4 [56] # site_res_seq_1 [24-27] = _struct_site_gen_label_seq_id # site_res_seq_2 [35-38] # site_res_seq_3 [46-49] # site_res_seq_4 [57-60] { if ($1 == "SITE") { site_flag_1 = 1 site_seq_no[site_flag] = substr( ($0), 8,3) site_id[site_flag] = substr( ($0), 12,3) site_no_res[site_flag] = substr( ($0), 16,2) site_res_name_1[site_flag] = substr( ($0), 19,3) site_res_name_2[site_flag] = substr( ($0), 30,3) site_res_name_3[site_flag] = substr( ($0), 41,3) site_res_name_4[site_flag] = substr( ($0), 52,3) site_chain_id_1[site_flag] = substr( ($0), 23,1) site_chain_id_2[site_flag] = substr( ($0), 34,1) site_chain_id_3[site_flag] = substr( ($0), 45,1) site_chain_id_4[site_flag] = substr( ($0), 56,1) site_res_seq_1[site_flag] = substr( ($0), 24,4) site_res_seq_2[site_flag] = substr( ($0), 35,4) site_res_seq_3[site_flag] = substr( ($0), 46,4) site_res_seq_4[site_flag] = substr( ($0), 57,4) if (site_chain_id_1[site_flag] == " ") site_chain_id_1[site_flag] = "." if (site_chain_id_2[site_flag] == " ") site_chain_id_2[site_flag] = "." if (site_chain_id_3[site_flag] == " ") site_chain_id_3[site_flag] = "." if (site_chain_id_4[site_flag] == " ") site_chain_id_4[site_flag] = "." ++site_flag } } #=========================================================================== # Keyword SOURCE # # The PDB describes the source of all components of the structure here # and has no way to parse out the individual entities, therefore it # is classified under _chemical_compound_source, however _entity_source # is more appropriate. # # source_cont [9-10] # source_text [11-70] = _chemical.compound_source { if ($1 == "SOURCE") { source_cont = substr( ($0), 9, 2) source_text = substr( ($0), 11, 60) if (convtext == "yes") \ source_text=\ typeset(source_text) if (source_cont == " ") { printf ("_chemical.compound_source \n; %60s \n", source_text) ++flag } else { printf ("%60s \n", source_text) ++flag } } } #=========================================================================== # Keyword SSBOND # # "disulf" = _struct_conn_type.id # = _struct_conn.conn_type_id # "defined by user in PDB file" = _struct_conn_type.criteria # " ? " = _struct_conn_type.reference # ssflag = _struct_conn.id # ssbond_num [8-10] = # ssbond_res_name_beg [12-14] = _struct_conn.ptnr1_label_comp_id # ssbond_chain_id_beg [16] = _struct_conn.ptnr1_label_asym_id # ssbond_res_seq_num_beg [18-21] = _struct_conn.ptnr1_label_seq_id # n/a = _struct_conn.ptnr1_label_alt_id # ssbond_res_name_end [26-28] = _struct_conn.ptnr2_label_comp_id # ssbond_chain_id_end [30] = _struct_conn.ptnr2_label_asym_id # ssbond_res_seq_num_end [32-35] = _struct_conn.ptnr2_label_seq_id # n/a = _struct_conn.ptnr2_label_alt_id # ssbond_comment [41-70] = _struct_conn_special_details # { if ($1 == "SSBOND") { ssbond_num[ssbond_flag] = substr( ($0), 8, 3) ssbond_res_name_beg[ssbond_flag] = substr( ($0),12, 3) ssbond_chain_id_beg[ssbond_flag] = substr( ($0),16, 1) ssbond_res_seq_num_beg[ssbond_flag] = substr( ($0),18, 4) ssbond_res_name_end[ssbond_flag] = substr( ($0),26, 3) ssbond_chain_id_end[ssbond_flag] = substr( ($0),30, 1) ssbond_res_seq_num_end[ssbond_flag] = substr( ($0),32, 4) ssbond_comment[ssbond_flag] = substr( ($0),41,30) if (ssbond_chain_id_beg[ssbond_flag] == " ") \ ssbond_chain_id_beg[ssbond_flag] = "." if (ssbond_chain_id_end[ssbond_flag] == " ") \ ssbond_chain_id_end[ssbond_flag] = "." if (ssbond_comment[ssbond_flag] == " ") \ ssbond_comment[ssbond_flag] = "." ++ss_flag ++ssbond_flag } } #======================================================================= # Keyword TER # # Used here to increment the number of entities found as polypeptide # chains or DNA strands. See ATOM/HETATM for rest of processing. # # ter_num [7-11] = # ter_res_name [18-20] = # ter_chain_id [22] = # ter_res_seq_num[23-26] = { if ($1 == "TER") { ter_chain_id[atom_flag] = "yes" ter_flag++ } } #======================================================================= # Keyword TURN # # From the PDB file it is not possible to determine _struct_conf.conf_type_id # # * indicates a _struct_topol data item which is not currently used # # "TURN" = _struct_conf.conf_type_id & # unknown = _struct_conf_conf_type_id # "From PDB" = _struct_conf_type_criteria # "?" = _struct_conf_type_reference # "Turn" = _struct_topol_type * # "From PDB" = _struct_topol_criteria * # turn_num [8-10] = _struct_conf.id # turn_id [12-14] = _struct_topol_id * # turn_res_name_beg [16-18] = _struct_conf.beg_label_comp_id # turn_chain_id_beg [20] = _struct_conf.beg_label_asym_id # turn_res_seq_beg [21-24] = _struct_conf.beg_label_seq_id # turn_res_name_end [27-29] = _struct_conf.end_label_comp_id # turn_chain_id_end [31] = _struct_conf.end_label_asym_id # turn_res_seq_end [32-35] = _struct_conf.end_label_seq_id # turn_comment [41-70] = _struct_conf.details { if ($1 == "TURN") { # parse field turn_num[ss_flag] = substr( ($0), 8, 3) turn_id[ss_flag] = substr( ($0),12, 3) turn_res_name_beg[ss_flag] = substr( ($0),16, 3) turn_chain_id_beg[ss_flag] = substr( ($0),20, 1) turn_res_seq_beg[ss_flag] = substr( ($0),21, 5) turn_res_name_end[ss_flag] = substr( ($0),27, 3) turn_chain_id_end[ss_flag] = substr( ($0),31, 1) turn_res_seq_end[ss_flag] = substr( ($0),32, 5) turn_comment[ss_flag] = substr( ($0),41,30) # strip blanks from id num_x = split(turn_id[ss_flag],xxx," ") turn_id[ss_flag] = "" if (num_x == 1) turn_id[ss_flag] = xxx[1] if (num_x == 2) turn_id[ss_flag] = (xxx[1] "_" xxx[2]) if (turn_chain_id_beg[ss_flag] == " ") turn_chain_id_beg[ss_flag] = "." if (turn_chain_id_end[ss_flag] == " ") turn_chain_id_end[ss_flag] = "." ++ss_flag ++turn_flag } } #========================================================================== # keyword CRYST1 # # Before processing CRYST1 output accumulated info from HELIX # and TURN records. # { if ($1 == "CRYST1") { if (ss_flag > 1 && ss_flag_2 == "1" ) { printf ("\nloop_ \n") printf ("_struct_conf_type.id\n") printf ("_struct_conf_type.criteria\n") printf ("_struct_conf_type.reference\n") for (i=1; i < helix_flag; ++i) { ++h_class_count[helix_class[i]] if (h_class_count[helix_class[i]] == 1) { printf (" %s 'From PDB' . \n", helix_class[i]) } } if (turn_flag > 1 && turn_flag_2 == "1") { printf (" TURN 'From PDB' . \n") ++turn_flag_2 } ++ss_flag_2 } if (ss_flag > 1 && ss_flag_2 == "2" ) { printf ("\nloop_ \n") printf ("_struct_conf.id\n") printf ("_struct_conf.conf_type_id\n") printf ("_struct_conf.beg_label_comp_id\n") printf ("_struct_conf.beg_label_asym_id\n") printf ("_struct_conf.beg_label_seq_id\n") printf ("_struct_conf.end_label_comp_id\n") printf ("_struct_conf.end_label_asym_id\n") printf ("_struct_conf.end_label_seq_id\n") printf ("_struct_conf.details\n") ++ss_flag_2 } # start with helix records for (i=1; i < helix_flag; ++i) { xxx[1]="" num_x = split(helix_comment[i],xxx," ") helix_comment[i] = xxx[1] for (j=2; j <= num_x; ++j) { helix_comment[i] = (helix_comment[i] " " xxx[j]) } printf ("helix_%-3s %-12s %3s %1s %5s %3s %1s %5s '%s'\n", \ helix_id[i], helix_class[i], helix_res_name_beg[i], \ helix_chain_id_beg[i], helix_res_seq_beg[i], \ helix_res_name_end[i],\ helix_chain_id_end[i], helix_res_seq_end[i], \ helix_comment[i]) } j = helix_flag # and now turn records k = j + turn_flag -1 for (i=j; i < k; ++i ) { xxx[1]="" num_x = split(turn_comment[i],xxx," ") turn_comment[i] = xxx[1] for (l=2; l <= num_x; ++l) { turn_comment[i] = (turn_comment[i] " " xxx[l]) } printf ("turn_%-3s TURN %3s %1s %5s %3s %1s %5s '%s'\n", \ turn_id[i], turn_res_name_beg[i], turn_chain_id_beg[i], \ turn_res_seq_beg[i], turn_res_name_end[i], \ turn_chain_id_end[i], turn_res_seq_end[i], \ turn_comment[i] ) } } } # # Now output site information # { if ($1 == "CRYST1") { if(site_flag_1 == "1") { printf ("\nloop_\n") printf ("_struct_site.id\n") printf ("_struct_site.details\n") site_flag_1 = 2 for (i=1; i < site_flag; ++i) printf (" %3s ?\n", site_id[i]) } if(site_flag_1 == "2") { printf ("\nloop_\n") printf ("_struct_site_gen.id\n") printf ("_struct_site_gen.site_id\n") printf ("_struct_site_gen.label_comp_id\n") printf ("_struct_site_gen.label_asym_id\n") printf ("_struct_site_gen.label_seq_id\n") printf ("_struct_site_gen.label_alt_id\n") printf ("_struct_site_gen.symmetry\n") printf ("_struct_site_gen.details\n") site_flag_1 = 3} site_num = 1 for (i=1; i < site_flag; ++i) { printf ( "%3s %3s %3s %1s %5s . 1_555 . \n", \ site_num, site_id[i], site_res_name_1[i], \ site_chain_id_1[i],site_res_seq_1[i]) ++site_num if (site_res_name_2[i] != " ") { printf ( "%3s %3s %3s %1s %5s . 1_555 . \n", \ site_num, site_id[i], site_res_name_2[i], \ site_chain_id_2[i],site_res_seq_2[i]) ++site_num } if (site_res_name_3[i] != " ") { printf ( "%3s %3s %3s %1s %5s . 1_555 . \n", \ site_num, site_id[i], site_res_name_3[i], \ site_chain_id_3[i],site_res_seq_3[i]) ++site_num } if (site_res_name_4[i] != " ") { printf ( "%3s %3s %3s %1s %5s . 1_555 . \n", \ site_num, site_id[i], site_res_name_4[i], \ site_chain_id_4[i],site_res_seq_4[i]) ++site_num } } } } # # Contains a b c alpha beta gamma SG Z # { if ($1 == "CRYST1") { # calculate cell volume { ca = cos($5 * 0.0174532) cb = cos($6 * 0.0174532) cc = cos($7 * 0.0174532) cz = (1.0 - (ca*ca - cb*cb - cc*cc) + (2.0*ca*cb*cc)) vol = ($2 * $3 * $4 * (sqrt(cz))) } # localize space group and Z { sg = substr( ($0), 56, 10) Z = substr( ($0), 67, 4 ) } printf ("\n") printf ("_cell.length_a %6.3f \n", $2) printf ("_cell.length_b %6.3f \n", $3) printf ("_cell.length_c %6.3f \n", $4) printf ("_cell.angle_alpha %6.3f \n", $5) printf ("_cell.angle_beta %6.3f \n", $6) printf ("_cell.angle_gamma %6.3f \n", $7) printf ("_cell.volume %10.1f \n", vol) printf ("_cell.details ? \n") printf ("_cell.Z_PDB %3d \n\n", Z) printf ("_symmetry.space_group_name_H-M ' %10s' \n\n", sg) if (verbose) { printf ("_cell_measurement.temp ? \n") printf ("_cell_measurement.theta_min ? \n") printf ("_cell_measurement.theta_max ? \n") printf ("_cell_measurement.wavelength ? \n") printf ("_cell_measurement.pressure ? \n") printf ("_cell_measurement.radiation ? \n") printf ("_cell_measurement.reflns_used ? \n\n") printf ("loop_\n") printf ("_cell_measurement_refln.index_h \n") printf ("_cell_measurement_refln.index_k \n") printf ("_cell_measurement_refln.index_l \n") printf ("_cell_measurement_refln.theta \n") printf (" ? ? ? ? \n") } } } END { # check seqres info for duplicates for (i=1; i <= num_poly_ents; ++i) { seq_chain[i] = "" seq_sig[i] = "0000000000" } for (i=1; i <= num_poly_ents; ++i) { entities_list[i] = ("Chain: " entities[i]) } for (is=1; is < seqres_flag; ++is) { num_seq = split(seq_text[is],seq_res," ") for (i=1; i <= num_seq; ++i) { k=ent_poly_num[seq_chain_id[is]] cur_res=seq_res[i] found = "no" for (j in na_list) { if (na_list[j] == cur_res) { if (substr(entities_list[k],1,3) == "Pro") { entities_list[k] = ("Protein/Nucleic Acid chain: " entities[k]) } else { entities_list[k] = ("Nucleic Acid chain: " entities[k]) } found = "yes" break } } if (found == "no") { for (j in aa_list) { if (aa_list[j] == cur_res) { if (index(entities_list[k],"Nuc") > 0) { entities_list[k] = ("Protein/Nucleic chain: " entities[k]) } else { entities_list[k] = ("Protein chain: " entities[k]) } break } } } if (res_code[cur_res] == "") { ++num_res_name if(num_res_name > length(charx)) num_res_name -= length(charx) res_code[cur_res] = substr(charx,num_res_name,1) } seq_chain[ent_poly_num[seq_chain_id[is]]] = \ (seq_chain[ent_poly_num[seq_chain_id[is]]] res_code[cur_res]) } } for (i=1; i <= num_poly_ents; ++i) { for (j=1; j < length(seq_chain[i]); ++j) { code_pair = substr(seq_chain[i],j,2) if (pair_code[code_pair] == "") { ++num_res_pair if(num_res_pair > length(numl)) num_res_pair -= length(numl) pair_code[code_pair] = substr(numl,num_res_pair,1) } seq_dig = substr(seq_sig[i],pair_code[code_pair]+1,1) seq_dig = seq_dig+1 if (seq_dig > 10) seq_dig -= 10 seq_sig[i] = \ (substr(seq_sig[i],1,pair_code[code_pair]) seq_dig \ substr(seq_sig[i],pair_code[code_pair]+2, \ length(seq_sig[i])-pair_code[code_pair]-1)) } } for (i=1; i <= num_poly_ents; ++i) { el_point[i] = length(entities_list[i]) } for (i=1; i < num_poly_ents; ++i) { for (j=i+1; j <= num_poly_ents; ++j) { xcomp=\ seq_comp(seq_sig[i],seq_sig[j],seq_match) if (seq_sig[i] == seq_sig[j]) { if (seq_chain[i] == seq_chain[j]) { entity_seq_num[entities[j]] = entity_seq_num[entities[i]] el_point[i] += 3 xtemp = ", " if (el_point[i] >= 70) { xtemp = ",\n " el_point[i] = 3 } entities_list[i] = \ (entities_list[i] xtemp entities[j]) } } else { if ( xcomp > 85 ) { printf("# **** WARNING **** approx %5d%% homology %3s to %3s \n",\ int(xcomp), entities[i], entities[j] ) } } } } # build a complete _entity list from ATOM HETATM and FORMUL records num_ents = num_poly_ents+1 next_non_poly = " " next_non_poly = ent_non_poly_point[next_non_poly] while (next_non_poly != "") { if (entity_seq_num[next_non_poly] == "" || \ entity_seq_num[next_non_poly]+0 > num_poly_ents ) { entities[num_ents] = next_non_poly entity_seq_num[next_non_poly] = num_ents ++num_ents } next_non_poly = ent_non_poly_point[next_non_poly] } { # Process AUDIT information printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# AUDIT #\n") printf ("# #\n") printf ("####################\n\n\n") printf("_audit.revision_id %4s\n",aud_rev_id) null = split( head_dep_date, dmy, "-") printf("_audit.creation_date %4s-%2s-%2s\n",\ yyyy[dmy[3]+0],mmm2mm[dmy[2]],dmy[1]) printf("_audit.update_record\n; ") if (audit_flag > 1 ) { for (j=1; j < audit_flag; ++j) { i = audit_point[audit_flag-j] printf("%4s-%2s-%2s PDB revision %5s\n " ,\ rev_date_year[i], rev_date_mon[i], \ rev_date_day[i],rev_name[i]) } # end for (j=1; j < audit_flag; ++j) } # end if (audit_flag > 1 ) #******** WARNING ******* Correct operation of the next two lines ######## # is system-dependent, see alternatives after # system \ ("/bin/echo `date +%Y-%m-%d` Converted to mmCIF format by pdb2cif") # If the prior line two lines cause an error message on your system, # # First try changing the "+%Y" to "+19%y" and if that fails, just # # comment out the two lines and forget it. All they do is add a trace # # of when the conversion was done # ########################################################################### printf(";") } for (i=1; i <= keywrd_flag; ++i ) { printf (key_save[i]) } # Print out _struct_biol_info if (verbose) { printf ("\nloop_\n") printf ("_struct_title\n") printf ("_struct_keywords\n") printf (" '%60s', ?\n", compnd[1]) printf ("\nloop_\n") printf ("_struct_biol.id\n") printf ("_struct_biol.details\n") printf (" %4s ' %40s' \n", head_PDB_code, head_funct_class) printf ("\nloop_\n") printf ("_struct_biol_gen.biol_id\n") printf ("_struct_biol_gen.asym_id\n") printf ("_struct_biol_gen.symmetry\n") printf ("_struct_biol_gen.details\n") for (i=1; i < num_ents; ++i) { printf (" %4s %4s 1_555 ?\n", head_PDB_code, entities[i]) } # end for (i=1; i < num_ents; ++i) printf ("\nloop_\n") printf ("_struct_biol_keywords.biol_id\n") printf ("_struct_biol_keywords.text\n") printf (" %4s ?\n", head_PDB_code) printf ("\nloop_\n") printf ("_struct_biol_view.biol_id\n") printf ("_struct_biol_view.id\n") printf ("_struct_biol_view.details\n") printf ("_struct_biol_view.rot_matrix[1][1]\n") printf ("_struct_biol_view.rot_matrix[1][2]\n") printf ("_struct_biol_view.rot_matrix[1][3]\n") printf ("_struct_biol_view.rot_matrix[2][1]\n") printf ("_struct_biol_view.rot_matrix[2][2]\n") printf ("_struct_biol_view.rot_matrix[2][3]\n") printf ("_struct_biol_view.rot_matrix[3][1]\n") printf ("_struct_biol_view.rot_matrix[3][2]\n") printf ("_struct_biol_view.rot_matrix[3][3]\n\n") printf (" %4s 1 ? ? ? ? ? ? ? ? ? ?\n", head_PDB_code) printf ("\nloop_\n") printf ("_struct_asym.id\n") printf ("_struct_asym.entity_id\n") printf ("_struct_asym.details\n") for (i=1; i < num_ents; ++i) { printf (" %4s %4s ?\n", \ entities[i], entity_seq_num[entities[i]]) } # end for (i=1; i < num_ents; ++i) } # end if (verbose) # Information about entities which are polymers if (seqres_flag+0 > 1) { printf("\n") printf("##########################\n") printf("# #\n") printf("# ENTITY_POLY_SEQ #\n") printf("# #\n") printf("##########################\n\n") printf ("loop_ \n") printf ("_entity_poly_seq.entity_id\n") printf ("_entity_poly_seq.num\n") printf ("_entity_poly_seq.mon_id\n") iscount = 0 iscurr = seq_chain_id[1] seq_number = 1 seq_start_num[entity_seq_num[iscurr]] = 1 for (is=1; is < seqres_flag; ++is) { if (ent_poly_num[seq_chain_id[is]] == \ entity_seq_num[seq_chain_id[is]]) { if (iscurr != seq_chain_id[is]) { seq_start_num[entity_seq_num[seq_chain_id[is]]] = seq_number if (iscount > 0) { printf ("\n") iscount = 0 } } iscurr = seq_chain_id[is] num_seq = split(seq_text[is],seq_res," ") for (i=1; i <= num_seq; ++i) { printf (" %5d %4s %3s",\ ent_poly_num[seq_chain_id[is]], \ seq_number, seq_res[i]) ++seq_number ++iscount if (iscount > 4) { printf ("\n") iscount = 0 } } } } if (iscount > 0) printf ("\n") } # # print _entity info combining HET ATOM and HETATM records printf ("\nloop_\n") printf ("_entity.id\n") printf ("_entity.type\n") printf ("_entity.details\n") for (i=1; i <= num_poly_ents; ++i) { if (ent_poly_num[entities[i]] == \ entity_seq_num[entities[i]]) { printf (" %5d polymer\n; %s\n;\n", \ entity_seq_num[entities[i]],entities_list[i]) } } for (i=num_poly_ents+1; i < num_ents; ++i) { if ( entities[i] != "HOH" && entities[i] != "DOD") { printf (" %5d non-polymer 'het group %s'\n", \ entity_seq_num[entities[i]],entities[i] ) } else { printf (" %5d water '%s' \n", \ entity_seq_num[entities[i]],entities[i] ) } } printf("\nloop_\n") printf("_struct_asym.entity_id\n") printf("_struct_asym.id\n") for (i=1; i < num_ents; ++i) { if ((i > num_poly_ents) ||(ent_poly_num[entities[i]] == \ entity_seq_num[entities[i]])) { printf (" %5d %s\n", entity_seq_num[entities[i]],entities[i] ) } } if (verbose) { printf ("\nloop_\n") printf ("_entity_name_com.entity_id\n") printf ("_entity_name_com.name\n") for (i=1; i <= num_poly_ents; ++i) { printf (" %3s ?\n", \ entities[i]) } for (i=num_poly_ents+1; i < num_ents; ++i) { printf (" %3s ?\n", \ entities[i]) } } if (verbose) { printf ("\nloop_\n") printf ("_entity_keywords.entity_id\n") printf ("_entity_keywords_text\n") for (i=1; i <= num_poly_ents; ++i) { printf (" %3s ? \n", entity_seq_num[entities[i]]) } for (i=num_poly_ents+1; i < num_ents; ++i) { printf (" %3s ? \n", entity_seq_num[entities[i]]) } } if (verbose) { printf ("\nloop_\n") printf ("_entity_reference.entity_id \n") printf ("_entity_reference.database_name \n") printf ("_entity_reference.database_code \n") printf ("_entity_reference.details \n") for (i=1; i <= num_poly_ents; ++i) { if (ent_poly_num[entities[i]] == \ entity_seq_num[entities[i]]) { printf (" %3s ? ? ? \n", entity_seq_num[entities[i]]) } } for (i=num_poly_ents+1; i < num_ents; ++i) { printf (" %3s ? ? ? \n", entity_seq_num[entities[i]]) } } # # Information about entities which are polymers # printf ("\nloop_ \n") printf ("_entity_poly.entity_id\n") printf ("_entity_poly.type\n") printf ("_entity_poly.nstd_chirality\n") printf ("_entity_poly.nstd_linkage\n") printf ("_entity_poly.nstd_monomer\n") printf ("_entity_poly.type_details\n") for (i=1; i <= num_poly_ents; ++i) { printf (" %1s ? ? ? ? ?\n", entity_seq_num[entities[i]]) } # Non-standard monomer entities described by FORMUL records if (formul_flag > 1) { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# CHEM_COMP #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("loop_\n") printf ("_chem_comp.id\n") printf ("_chem_comp.mon_nstd_flag\n") printf ("_chem_comp.formula\n") mon_flag = 0 for (i=1; i < formul_flag; ++i) { if (formul_het_cont_flag[i] == " ") { if (mon_flag != 0) printf("; \n") ++mon_flag mon_ns = "yes" if( entity_seq_num[formul_het_site_symbol[i]] > num_poly_ents) \ mon_ns = "no " printf(" %3s %3s\n; %s\n", formul_het_site_symbol[i], \ mon_ns, formul_het_text[i]) } else { printf(" %s\n",formul_het_text[i]) } } # end for (i=1; i < formul_flag; ++i) if (mon_flag != 0) printf("; \n") } printf ("\n\n\n") printf ("######################\n") printf ("# #\n") printf ("# ATOM_SITES #\n") printf ("# #\n") printf ("######################\n\n\n") # PRINT ORIGX INFO for (i=1; i <= origx_flag; ++i) { printf (om_save[i]) } # PRINT SCALE INFO for (i=1; i <= scale_flag; ++i) { printf (sc_save[i]) } # PRINT MTRIX INFO for (i=1; i <= mtrix_flag; ++i) { printf (mat_save[i]) } # print alternate sites info if (at_alt) { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# ATOM_SITES_ALT #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("\nloop_\n") printf ("_atom_sites_alt.id\n") printf ("_atom_sites_alt.details\n") for (i in atom_alt_list) printf(" %3s ? \n", i) if (verbose) { printf ("\n\n\n") printf ("######################\n") printf ("# #\n") printf ("# ATOM_SITES_ALT_ENS #\n") printf ("# #\n") printf ("######################\n\n\n") printf ("\nloop_\n") printf ("_atom_sites_alt_ens.id\n") printf ("_atom_sites_alt_ens.details\n") printf (" 'Ensemble 1' ? \n") printf ("\n\n\n") printf ("######################\n") printf ("# #\n") printf ("# ATOM_SITES_ALT_GEN #\n") printf ("# #\n") printf ("######################\n\n\n") printf ("\nloop_\n") printf ("_atom_sites_alt_gen.ens_id\n") printf ("_atom_sites_alt_gen.alt_id\n") for (i in atom_alt_list) printf(" 'Ensemble 1' %3s \n", i) } # end if (verbose) } # end if (at_alt) if (foot_flag > 0) { printf ("\n\n\n") printf ("######################\n") printf ("# #\n") printf ("# ATOM_SITES_FOOTNOTE#\n") printf ("# #\n") printf ("######################\n\n\n") for (i=1; i <= foot_flag; ++i) { printf (ft_save[i]) } printf (";\n") } # print ATOM/HETATM info if (atom_flag_1 == "1") { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# ATOM_SITE #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("\nloop_\n") printf ("_atom_site.entity_seq_num\n") printf ("_atom_site.group_PDB\n") printf ("_atom_site.type_symbol \n") printf ("_atom_site.label_atom_id \n") printf ("_atom_site.label_comp_id \n") printf ("_atom_site.label_asym_id \n") printf ("_atom_site.label_seq_id \n") printf ("_atom_site.label_alt_id \n") printf ("_atom_site.cartn_x \n") printf ("_atom_site.cartn_y \n") printf ("_atom_site.cartn_z \n") printf ("_atom_site.occupancy\n") printf ("_atom_site.B_iso_or_equiv \n") printf ("_atom_site.footnote_id\n") printf ("_atom_site.entity_id\n") printf ("_atom_site.id\n") if (aniso_flag > 0) { printf ("_atom_site.aniso_U[1][1]\n") printf ("_atom_site.aniso_U[1][2]\n") printf ("_atom_site.aniso_U[1][3]\n") printf ("_atom_site.aniso_U[2][2]\n") printf ("_atom_site.aniso_U[2][3]\n") printf ("_atom_site.aniso_U[3][3]\n") } # end if (aniso_flag > 0) atom_flag_1 = 2 } # end if (atom_flag_1 == "1") prior_res_name = "" for (i=1; i < atom_flag; ++i) { if (entity_seq_num[entity_id[i]]+0 == 0) { num_ents++ entity_seq_num[entity_id[i]] = num_ents printf(\ "# *** WARNING *** At atom number %5s unidentified entity %3s found \n", atom_number[i], entity_id[i]) } atom_ent_seq_num[i]="." if (prior_res_name != \ (entity_id[i] residue_seq_number[i] residue_insert_ind[i])) { prior_res_name = (entity_id[i] residue_seq_number[i] residue_insert_ind[i]) x_num=split(residue_name[i],xxx," ") res_locs[prior_res_name] = (res_locs[prior_res_name] " " i) cur_res=xxx[1] if (res_code[cur_res] == "") { ++num_res_name if(num_res_name > length(charx)) num_res_name -= length(charx) res_code[cur_res] = substr(charx,num_res_name,1) } al_seq[entity_id[i]] = (al_seq[entity_id[i]] res_code[cur_res]) al_back_point[(length(al_seq[entity_id[i]]) entity_id[i])] = i } al_seq_point[i]=length(al_seq[entity_id[i]]) } for (i in al_seq) { nn_res=entity_seq_num[i] if(nn_res <= num_poly_ents) { xmat=\ seq_comp(al_seq[i],seq_chain[nn_res],seq_match) if ( xmat < 90) { printf("# *** WARNING *** only %5d%% homology to chain %3s\n", int(xmat), i) } for(j=1; j<=length(al_seq[i]); ++j) { if(seq_match[j] != 0) { k = al_back_point[( j i)] atom_ent_seq_num[k] = seq_match[j]+seq_start_num[nn_res]-1 } } } } prior_res_name="" for (i=1; i < atom_flag; ++i) { if (prior_res_name != \ (entity_id[i] residue_seq_number[i] residue_insert_ind[i])) { prior_res_name = (entity_id[i] residue_seq_number[i] residue_insert_ind[i]) if (atom_ent_seq_num[i] == ".") { x_num = split(res_locs[prior_res_name],xxx," ") if(x_num > 1) atom_ent_seq_num[i] = atom_ent_seq_num[xxx[1]] } } if (dense_list != "yes" && atom_ent_seq_num[i] == ".") { x_num = split(res_locs[prior_res_name],xxx," ") if(x_num > 0) atom_ent_seq_num[i] = atom_ent_seq_num[xxx[1]] } es = "" if (atom_ent_seq_num[i] != ".") { es = "\n "} xs = "" if (substr( atom_x[i], 1, 1) != " ") { xs = "\n "} ys = "" if (substr( atom_y[i], 1, 1) != " ") { ys = "\n "} zs = "" if (substr( atom_z[i], 1, 1) != " ") { zs = "\n "} os = "" if (substr( atom_occ[i], 1, 1) != " ") { os = "\n "} bs = "" if (substr( B_or_U[i], 1, 1) != " ") { bs = "\n "} printf \ ("%s%s %-4s %2s %4s %4s %1s %5s %1s%s%8s%s%8s%s%8s%s%6s%s%6s %2s %3d %6d\n", \ atom_ent_seq_num[i],es, \ substr(atom_or_het[i],1,4),atom_type[i], atom_name[i], \ residue_name[i], chain_id[i], \ (residue_seq_number[i] residue_insert_ind[i]),\ atom_alt_location[i], xs, atom_x[i], ys, atom_y[i], zs, \ atom_z[i], os, atom_occ[i], bs, B_or_U[i], \ footnote_number[i], entity_seq_num[entity_id[i]], \ atom_number[i] ) if (ansio_flag > 0) { x = aniso_point[atom_number[i]] printf (" %7g %7g %7g %7g %7g %7g \n", \ atom_U11[x]/10000, atom_U12[x]/10000, atom_U13[x]/10000 , \ atom_U22[x]/10000, atom_U23[x]/10000, atom_U33[x]/10000 ) } # end if (ansio_flag > 0) } # end for (i=1; i < atom_flag; ++i) # Process HEADER records for DATABASE_2 { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# DATABASE_2 #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("_database_2.database_id PDB \n") printf ("_database_2.database_code %4s \n",head_PDB_code) } # Process REVDAT records results { if (revdat_flag != "1") { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# DATABASE_PDB_REV #\n") printf ("# #\n") printf ("####################\n\n\n") null = split( head_dep_date, dmy, "-") printf("_database_PDB_rev.date_original %4s-%2s-%2s\n",\ yyyy[dmy[3]+0],mmm2mm[dmy[2]],dmy[1]) printf ("\nloop_\n") printf ("_database_PDB_rev.num\n") printf ("_database_PDB_rev.date\n") printf ("_database_PDB_rev.mod_type\n") for (j=1; j < revdat_flag; ++j) { if ( rev_cont_flag[j] == " " ) { printf ("%3s %4s-%2s-%2s %1s \n", \ rev_mod_number[j], rev_date_year[j], rev_date_mon[j], \ rev_date_day[j], rev_type[j]) } # end if ( rev_cont_flag[j] == " " ) } pdb_reloop = 0 for (j=1; j < revdat_flag; ++j) { # Additional data items if more than initial entry rev_num_change = split( rev_rec_corr[j], rev_recs, " " ) for(k=1; k<=rev_num_change; ++k) { if (pdb_reloop == 0 ) { printf ("\nloop_\n") printf ("_database_PDB_rev_record.rev_num\n") printf ("_database_PDB_rev_record.details\n") printf ("_database_PDB_rev_record.type\n") pdb_reloop++ } # end if (pdb_reloop == 0 ) printf (" %3s %5s %-8s\n", rev_mod_number[j], \ rev_name[j],rev_recs[k]) } } # end for (j=1; j < revdat_flag; ++j) } # end if (revdat_flag != "1") # Process SPRSDE and OBSLTE records for (i=1; i <= s_o_flag; ++ i) { my_type = s_o_type[i] my_cont = s_o_cont[i] my_date = s_o_date[i] my_name = s_o_name[i] my_list = s_o_list[i] if (my_cont == " " ) { my_dir = "_database_PDB_rev.replaced_by" if (my_type == "SPRSDE" ) my_dir = "_database_PDB_rev.replaces" my_num = rev_mod_of_date[my_date] if (my_num == "" || my_num == " ") my_num = "?" } if (my_date == " ") my_date = " ?" my_count = split( my_list, my_revs, " ") for (j=1; j <= my_count; ++ j) { printf ("\nloop_\n") printf ("_database_PDB_rev.num\n") printf ("_database_PDB_rev.date\n") printf ((my_dir "\n")) printf ((my_num " " my_date " " my_revs[j])) } } # Process HEADER records for STRUCT_BIOL { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# STRUCT_BIOL #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("_struct_biol.id %4s \n",head_PDB_code) printf ("_struct_biol.details '%s'\n",head_funct_class) } # # Process SHEET information. The PDB format make distinct sheets # out of bifurcated sheets and from sheets with broken strands. The # mmCIF format allows them to be combined. We first must identify # sheets that share strands. # # Note: If you convert this code to another language, be aware that # strong use has been made of the awk initialization to "" # if (sheet_flag > 0) { printf ("\n\n\n") printf ("################\n") printf ("# #\n") printf ("# STRUCT_SHEET #\n") printf ("# #\n") printf ("################\n\n") x_prior_strand = "" num_strand = 0 num_pair = 0 for (i=1; i <= sheet_flag; ++i) { strand = (sheet_res_name_beg[i] " " \ sheet_chain_id_beg[i] " " \ sheet_res_seq_beg[i] " " \ sheet_res_name_end[i] " " \ sheet_chain_id_end[i] " " \ sheet_res_seq_end[i]) ++strand_count[strand] if (strand_count[strand] == 1) { strand_list[++num_strand] = strand } my_strand_name = (sheet_id[i] "|" sheet_strand_no[i]) if (ordered_strand_name[strand] == "" || \ ordered_strand_name[strand] > my_strand_name) { ordered_strand_name[strand] = my_strand_name } if (sheet_id_list[strand] == "") { sheet_id_list[strand] = sheet_id[i] } else { sheet_id_list[strand] = (sheet_id_list[strand] "|" sheet_id[i]) } if (sheet_strand_no[i] > 1) { my_pair = (strand "|" x_prior_strand) strand_pair[my_pair] = i ++pair_count[my_pair] if (pair_count[my_pair] == 1) { pair_list[++num_pair] = my_pair } } x_prior_strand = strand if (sheet_strands[sheet_id[i]] == "") { ++num_sheet sheet_pdb_size[sheet_id[i]] = sheet_no_strands[i] sheet_strands[sheet_id[i]] = sheet_strand_no[i] } else { sheet_strands[sheet_id[i]] = (sheet_strands[sheet_id[i]]\ " " sheet_strand_no[i]) } } # prepare pointers for merge lists of sheet for (sheet_name in sheet_strands) { sheet_merge[sheet_name] = "" sheet_root[sheet_name] = sheet_name sheet_size[sheet_name] = sheet_pdb_size[sheet_name] } # merge sheets which share any strands for (strand in sheet_id_list) { num_sheet = split(sheet_id_list[strand],sheets,"|") first_sheet = sheets[1] first_sheet = sheet_root[first_sheet] for (i=2; i <= num_sheet; ++i) { target_sheet = sheets[i] target_sheet = sheet_root[target_sheet] if (first_sheet != target_sheet) { if (target_sheet < first_sheet) { temp_sheet = target_sheet target_sheet = first_sheet first_sheet = temp_sheet } prev_sheet = first_sheet while (target_sheet != "") { next_sheet = sheet_merge[prev_sheet] prev_sheet = next_sheet if (next_sheet > target_sheet || next_sheet == "") { prev_sheet = target_sheet target_sheet = sheet_merge[target_sheet] sheet_merge[prev_sheet] = next_sheet sheet_root[prev_sheet] = first_sheet if (sheet_size[first_sheet] < sheet_size[prev_sheet] ) { sheet_size[first_sheet] = sheet_size[prev_sheet] } } } } } } # reorganize the strand names and prepare a sorted list strand_point[" "] = "" for (is = 1; is <= num_strand; ++is) { strand = strand_list[is] num_x = split(ordered_strand_name[strand],xxx,"|") ordered_strand_name[strand]= \ (sheet_root[xxx[1]] " " xxx[2] " "xxx[1]) numy = split ( (xxx[2] " " xxx[1]), yyy, " ") strand_name[strand] = (yyy[1] "_" yyy[2]) prior_strand = " " next_strand = strand_point[" "] reloop = "yes" while (reloop == "yes") { if (ordered_strand_name[strand] < \ ordered_strand_name[next_strand] || \ next_strand == "") { strand_point[strand] = next_strand strand_point[prior_strand] = strand reloop = "no" } else { prior_strand = next_strand next_strand = strand_point[prior_strand] } } } next_strand = " " for (i=1; i <= num_strand; ++i) { next_strand = strand_point[next_strand] strand_list[i] = next_strand } # now sort pair names in the same order pair_point[" "] = "" for (ip = 1; ip <= num_pair; ++ip) { my_pair = pair_list[ip] num_x=split(my_pair,xxx,"|") x_my_pair = (ordered_strand_name[xxx[2]] " " \ ordered_strand_name[xxx[1]] ) prior_pair = " " next_pair = pair_point[" "] reloop = "yes" while (reloop == "yes") { num_y = split(next_pair,yyy,"|") y_my_pair = (ordered_strand_name[yyy[2]] " " \ ordered_strand_name[yyy[1]] ) if (x_my_pair < y_my_pair || \ next_pair == "") { pair_point[my_pair] = next_pair pair_point[prior_pair] =my_pair reloop = "no" } else { prior_pair = next_pair next_pair = pair_point[prior_pair] } } } next_pair = " " for (i=1; i <= num_pair; ++i) { next_pair = pair_point[next_pair] pair_list[i] = next_pair } # and sort the sheet names sheet_point[" "] = "" num_sheet = 0 for (is = 1; is <= num_strand; ++is) { strand = strand_list[is] my_num=split(sheet_id_list[strand],my_sid,"|") i = my_sid[1] my_sheet_name = sheet_root[i] ++sheet_strand_count[my_sheet_name] if (sheet_strand_count[my_sheet_name] == 1) { prior_sheet = " " next_sheet = sheet_point[" "] reloop = "yes" while (reloop == "yes") { if (my_sheet_name == next_sheet ) { reloop = "no" } else { if (my_sheet_name < next_sheet || \ next_sheet == "") { sheet_point[my_sheet_name] = next_sheet sheet_point[prior_sheet] = my_sheet_name reloop = "no" ++num_sheet } else { prior_sheet = next_sheet next_sheet = sheet_point[prior_sheet] } } } } } # At this point, the sheets indexed by names are linked via # sheet_merge, with the root sheets given by the entries for # which sheet_root points to the same name and sorted lists are # available for sheets, strands and strand pairs # printf ("loop_\n") printf ("_struct_sheet.id\n") printf ("_struct_sheet.number_strands\n") my_sheet_name = " " for (i = 1; i <= num_sheet; ++i) { my_sheet_name = sheet_point[my_sheet_name] printf ("%3s %5d\n", my_sheet_name, sheet_size[my_sheet_name]) } printf ("\nloop_\n") printf ("_struct_sheet_hbond.sheet_id\n") printf ("_struct_sheet_hbond.range_id_1\n") printf ("_struct_sheet_hbond.range_id_2\n") printf ("_struct_sheet_hbond.range_1_beg_label_seq_id\n") printf ("_struct_sheet_hbond.range_1_beg_label_atom_id\n") printf ("_struct_sheet_hbond.range_2_beg_label_seq_id\n") printf ("_struct_sheet_hbond.range_2_beg_label_atom_id\n") printf ("_struct_sheet_hbond.range_1_end_label_seq_id\n") printf ("_struct_sheet_hbond.range_1_end_label_atom_id\n") printf ("_struct_sheet_hbond.range_2_end_label_seq_id\n") printf ("_struct_sheet_hbond.range_2_end_label_atom_id\n") for (ip = 1; ip <= num_pair; ++ip) { my_pair = pair_list[ip] my_p_num = split(my_pair,my_strands,"|") strand = my_strands[1] i = strand_pair[my_pair] my_strand_name = strand_name[strand] if (sheet_strand_no[i] > 1) { p_strand = my_strands[2] p_strand_name = strand_name[p_strand] printf("%3s %7s %7s %5s %4s %5s %4s %5s %4s %5s %4s\n",\ sheet_root[sheet_id[i]], p_strand_name, my_strand_name, \ sheet_res_seq_reg_2[i], sheet_atom_name_reg_2[i],\ sheet_res_seq_reg_1[i], sheet_atom_name_reg_1[i],\ sheet_res_seq_reg_2[i], sheet_atom_name_reg_2[i],\ sheet_res_seq_reg_1[i], sheet_atom_name_reg_1[i] ) } } printf ("\nloop_\n") printf ("_struct_sheet_order.sheet_id\n") printf ("_struct_sheet_order.range_id_1\n") printf ("_struct_sheet_order.range_id_2\n") printf ("_struct_sheet_order.offset\n") printf ("_struct_sheet_order.sense\n") for (ip=1; ip <= num_pair; ++ip) { my_pair = pair_list[ip] my_p_num = split(my_pair,my_strands,"|") strand = my_strands[1] i = strand_pair[my_pair] my_sheet_name = sheet_root[sheet_id[i]] my_strand_name = strand_name[strand] if (sheet_strand_no[i] > 1) { p_strand = my_strands[2] j = i-1 p_sheet_name = sheet_root[sheet_id[j]] p_strand_name = strand_name[p_strand] printf("%3s %7s %7s +1 %s\n",\ sheet_root[sheet_id[i]],p_strand_name,my_strand_name,sheet_sense[i]) } } printf ("\nloop_\n") printf ("_struct_sheet_range.sheet_id\n") printf ("_struct_sheet_range.id\n") printf ("_struct_sheet_range.beg_label_comp_id\n") printf ("_struct_sheet_range.beg_label_asym_id\n") printf ("_struct_sheet_range.beg_label_seq_id\n") printf ("_struct_sheet_range.end_label_comp_id\n") printf ("_struct_sheet_range.end_label_asym_id\n") printf ("_struct_sheet_range.end_label_seq_id\n") for (is = 1; is <= num_strand; ++is ){ strand = strand_list[is] my_num=split(sheet_id_list[strand],my_sid,"|") i = my_sid[1] my_sheet_name = sheet_root[i] my_strand_name = strand_name[strand] printf("%3s %7s %s\n",\ my_sheet_name, my_strand_name, strand) } } # # print out summaries # { printf ("\n# REMARK records parsed \t= %6d;", all_remarks) printf ("# specified by PDB \t= %6d\n", total_remark) for (x = 1; x <= 10; ++x) { total_ftnote_flag = total_ftnote_flag + ftnote_flag[x] } # end for (x = 1; x <= 10; ++x) printf ("# FTNOTE records parsed \t= %6s;", total_ftnote_flag) printf ("# specified by PDB \t= %6s\n", total_ftnote) printf ("# HET records parsed \t\t= %6d;", (het_flag-1)) printf ("# specified by PDB \t= %6d\n", total_het) printf ("# HELIX records parsed \t\t= %6d;", (helix_flag-1)) printf ("# specified by PDB \t= %6d\n", total_helix) printf ("# SHEET records parsed \t\t= %6d;", (sheet_flag)) printf ("# specified by PDB \t= %6d\n", total_sheet) printf ("# TURN records parsed \t\t= %6d;", (turn_flag-1)) printf ("# specified by PDB \t= %6d\n", total_turn) printf ("# SITE records parsed \t\t= %6d;", (site_flag-1)) printf ("# specified by PDB \t= %6d\n", total_site) total_a_h_flag = (atom_flag -1 -ter_flag) printf ("# AT+HET records parsed \t= %6d;", (total_a_h_flag)) printf ("# specified by PDB \t= %6d\n", total_a_h) printf ("# TER records parsed \t\t= %6d;", ter_flag ) printf ("# specified by PDB \t= %6d\n", total_ter ) printf ("# CONECT records parsed \t= %6d;", conect_flag_2) printf ("# specified by PDB \t= %6d\n", total_conect) printf ("# SEQRES records parsed \t= %6d;", (seqres_flag-1)) printf ("# specified by PDB \t= %6d\n", total_seqres) printf ("# Total of %6d records processed from PDB file\n", NR) } } } function squeezname( str, temp_a_name) { # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = str if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " return(temp_a_name) } function typeset( str, lostr,lstr,mystr,pchar,i) { lostr = tolower(str) lstr = length(lostr) mystr = "" pchar = " " for( i=1; i <= lstr; ++i) { mychar = substr( lostr, i, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = toupper(mychar) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } } else pchar = mychar } return(mystr) } function seq_comp(seqal,seqsr,seq_match,\ lsal,lssr,lm,isha,isr,ksa,nm) { lsal = length(seqal) lssr = length(seqsr) isa = 1 isr = 1 nm = 0 while ( isa <= lsal) { seq_match[isa] = 0 sm = 0 lm = lsal - isa + 1 while ( lm > 0 && sm == 0 ) { cr = substr(seqal, isa, lm) sm = index(substr(seqsr,isr,lssr-isr+1),cr) if (sm > 0) { for (ksa=0; ksa < length(cr); ++ksa) { seq_match[isa] = sm+ksa+isr-1 ++isa } isr += length(cr) if (length(cr) > 3) nm += length(cr) break } else { if (lm > 16) { lm = int(lm * .707) } else { --lm } } } if (sm==0) ++isa } return(100*(nm/lsal)) }