%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structural bioinformatics books %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Book{Bourne03, author = "P.E. Bourne and H. Weissig", title = "Structural Bioinformatics", publisher = "{W}iley-{L}iss", location = "Hoboken, NJ", year = "2003", url = "http://www.amazon.com/gp/product/product-description/0471201995", comment = "This is a good introductory book on structural bioinformatics. It practical rather than theoretical - it reviews the main sources of structural data (e.g., PDB, NDB, etc.) and surveys the most popular methods used for predicting structure, aligning structures, predicting function, etc.", } @Book{Orengo04, author = "C.A. Orengo and D.T. Jones and J.M. Thornton", title = "Bioinformatics: Genes, Proteins, \& Computers", publisher = "BIOS Scientific Publishers", location = "Abingdon, UK", year = "2004", url = "http://www.amazon.com/exec/obidos/tg/detail/-/1859960545/ref=olp_product_details/104-9907891-5367125?%5Fencoding=UTF8&v=glance", comment = "This is a good book on bioinformatics, with particular emphasis on structural bioinformatics.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Cheminformatics books %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Book{Gasteiger, author = "J. Gasteiger and T. Engel", title = "Cheminformatics", publisher = "Wiley-VCH", location = "Weinheim, Germany", year = "2003", url = "http://www.amazon.com/gp/product/3527306811/ref=bxgy_cc_text_b/104-0173826-3484711?%5Fencoding=UTF8", } @Book{Leach03, author = "A. Leach and V. Gillet", title = "An Introduction to Cheminformatics", publisher = "Springer", year = "2003", isbn = "1402013477", url = "http://www.amazon.com/gp/product/1402013477/104-0173826-3484711?v=glance&n=283155&s=books&v=glance", } @Book{Bajorath04, author = "J. Bajorath", title = "Chemoinformatics: Concepts, Methods, and Tools for Drug Discovery (Methods in Molecular Biology)", publisher = "Humana Press", month = "June", year = "2004", isbn = "1588292614", url = "http://www.amazon.com/gp/product/1588292614/104-0173826-3484711?v=glance&n=283155&%5Fencoding=UTF8&v=glance", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Molecular modeling books %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Book{Leach97, author = "A. Leach", title = "Molecular Modelling: Principles and Applications", publisher = "Longman Pub Group ", year = "1997", isbn = "0582239338", url = "http://www.amazon.com/gp/product/0582239338/104-0173826-3484711?v=glance&n=283155&s=books&v=glance", } @Book{Schlick02, author = "T. Schlick", title = "Molecular Modeling and Simulation", publisher = "Springer", year = "2002", isbn = "038795404X", url = "http://www.amazon.com/gp/product/038795404X/104-0173826-3484711?v=glance&n=283155&%5Fencoding=UTF8&v=glance", } @Book{Holtje03, author = "Holtje and Sippl and Rognan and Folkers", title = "Molecular Modeling: Basic Principles and Applications", publisher = "Wiley-VCH", year = "2003", isbn = "3527305890", url = "http://www.amazon.com/gp/product/3527305890/104-0173826-3484711?v=glance&n=283155&%5Fencoding=UTF8&v=glance", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structural bioinformatics overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Goldsmith-Fischman03, author = "S. {Goldsmith-Fischman} and B. Honig", title = "Structural genomics: Computational methods for structure analysis", journal = "Protein Science", volume = "12", year = "2003", pages = "1813-1821", url = "http://www.proteinscience.org/cgi/content/full/12/9/1813", } @Article{Blundell00, title = "Structural genomics: an overview", author = "T.L. Blundell and K. Mizuguchi", journal = "Progress in Biophysics \& Molecular Biology", volume = "73", year = "2000", pages = "289-295", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Sequence databases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Apweiler04, title = "Protein sequence databases", author = "R. Apweiler and A. Bairoch and C.H. Wu", journal = "Current Opinion in Chemical Biology", volume = "8", number = "1", month = "February", year = "2004", pages = "76-80", comment = "This is the main reference for the UniProt database", } @Article{Bairoch05, author = "A. Bairoch and R. Apweiler and C.H. Wu and W.C. Barker and B. Boeckmann and S. Ferro and E. Gasteiger and H. Huang and R. Lopez and M. Magrane and M.J. Martin and D.A. Natale and C. {O'Donovan} and N. Redaschi and L.S. Yeh", title = "The Universal Protein Resource ({UniProt})", journal = "Nucleic Acids Res.", year = "2005", month = "January", volume = "33", pages = "D154-D159", comment = "This is a paper about the UniProt database", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure databases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Berman00, author = "H.M. Berman and J. Westbrook and Z. Feng and G. Gilliland and T.N. Bhat and H. Weissig and I.N. Shindyalov and P.E. Bourne", title = "The Protein Data Bank", journal = "Nucleic Acids Research", volume = "28", year = "2000", pages = "235-242", comment = "This is the main reference for the PDB", } @Article{Berman92, author = "H.M. Berman and W.K. Olson and D.L. Beveridge and J. Westbrook and A. Gelbin and T. Demeny and S.H. Hsieh and A.R. Srinivasan and B. Schneider", title = "The Nucleic Acid Database: A Comprehensive Relational Database of Three-Dimensional Structures of Nucleic Acids", journal = "Biophys. J.", volume = "63", pages = "751-759", year = "1992", comment = "This is the main reference for the NDB", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure database annotations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Laskowski97, author = "R.A. Laskowski and E.G. Hutchinson and A.D. Michie A.C. Wallace and M.L. Jones and J.M. Thornton", title = "PDBsum: A Web-based database of summaries and analyses of all PDB structures", journal = "Trends Biochem. Sci.", volume = "22", pages = "488-490", year = "1997", comment = "This is the original paper about PDBsum, a web-based service for summarizing known information about every PDB file - very useful (http://www.ebi.ac.uk/thornton-srv/databases/pdbsum/).", } @Article{Laskowski01, author = "R.A. Laskowski", title = "PDBsum: summaries and analyses of PDB structures", journal = "Nucleic Acids Res", volume = "29", pages = "221-222", year = "2001", comment = "This is an update to PDBsum paper", } @Article{Laskowski05b, author = "R.A. Laskowski and V.V. Chistyakov and J.M. Thornton", title = "{PDBsum} more: new summaries and analyses of the known {3D} structures of proteins and nucleic acids", journal = "Nucleic Acids Res 33 Database", volume = "Issue", year = "2005", pages = "D266-268", comment = "This is another update to PDBsum paper", } @Article{Velankar05, author = "S. Velankar and P. McNeil and V. Mittard-Runte and A. Suarez and D. Barrell and R. Apweiler and K. Henrick", year = "2005", title = "E-MSD: an integrated data resource for bioinformatics", journal = "Nucleic Acids Res (Database Issue)", volume = "33", pages = "D262-D265", comment = "This is the main reference for the MSD, the macromolecular structural database (http://www.ebi.ac.uk/msd/). It provides a relational database with structures (e.g., PDB), quaternary structure predictions (PQS), classifications (e.g., SCOP, CATH, EC, etc.) and results of analyses (e.g., protein-ligand contacts).", } @Article{Schomburg00, author = "I. Schomburg and O. Hofmann and C. Bansch and A. Chang and D. Schomburg", title = "Enzyme data and metabolic information: {BRENDA,} a resource for research in biology, biochemistry, and medicine", journal = "Gene Funct. Dis.", year = "2000", volume = "3", number = "4", pages = "109-118", comment = "This is the original reference for BRENDA, a database with information about enzymes (http://www.brenda.uni-koeln.de/).", } @Article{Bairoch00, author = "A. Bairoch", title = "The {ENZYME} database in 2000", journal = "Nucleic Acids Res", volume = "28", pages = "304-305", year = "2000", comment = "This is the main reference for the ENZYME database, which contains information about binding sites in enzymes (contacts, cofactors, etc.) (http://www.expasy.org/enzyme/).", } @Article{Hobohm92, author = "U. Hobohm and M. Scharf and R. Schneider and C.Sander", title = "Selection of a representative set of structures from the {Brookhaven Protein Data Bank}", journal = "Protein Science", volume = "1", year = "1992", pages = "409-417", comment = "This is the original reference for PDBSelect.", } @Article{Hobohm94, author = "U. Hobohm and C. Sander", title = "Enlarged representative set of protein structures", journal = "Protein Science", volume = "3", year = "1994", pages = "522", comment = "This is an update for PDBSelect.", } @Article{Henrick98, author = "K. Henrick and J.M. Thornton", title = "{PQS:} a protein quaternary structure file server", journal = "Trends in Biochemical Sciences", volume = "23", number = "9", year = "1998", pages = "358-361", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Databases of small molecules %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Irwin05, author = "J.J. Irwin and B.K. Shoichet", title = "{ZINC} - A Free Database of Commercially Available Compounds for Virtual Screening", journal = "J. Chem. Inf. Model", volume = "45", number = "1", year = "2005", pages = "177-182", comment = "This is the main reference for ZINC (http://blaster.docking.org/zinc/).", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand complex databases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Chalk04, author = "A.J. Chalk and C.L. Worth and J.P. Overington and A.W.E Chan", title = "{PDBLIG:} Classification of Small Molecular Protein Binding in the Protein Data Bank", journal = "J. Med. Chem.", volume = "47", number = "15", year = "2004", pages = "3807-3816", abstract = "", } @Article{Feng04, author = "Z. Feng and L. Chen and H. Maddula and O. Akcan and R. Oughtred and H.M. Berman and J. Westbrook", title = "{Ligand Depot:} a data warehouse for ligands bound to macromolecules", journal = "Bioinformatics", volume = "20", number = "13", year = "2004", pages = "2153-2155", abstract = "Ligand Depot is an integrated data resource for finding information about small molecules bound to proteins and nucleic acids. The initial release (version 1.0, November, 2003) focuses on providing chemical and structural information for small molecules found as part of the structures deposited in the Protein Data Bank. Ligand Depot accepts keyword-based queries and also provides a graphical interface for performing chemical substructure searches. A wide variety of web resources that contain information on small molecules may also be accessed through Ligand Depot. AVAILABILITY: Ligand Depot is available at http://ligand-depot.rutgers.edu/. Version 1.0 supports multiple operating systems including Windows, Unix, Linux and the Macintosh operating system. The current drawing tool works in Internet Explorer, Netscape and Mozilla on Windows, Unix and Linux.", } @Article{Puvanendrampillai03, author = "D. Puvanendrampillai and J.B. Mitchell", title = "Protein Ligand Database ({PLD}): additional understanding of the nature and specificity of protein-ligand complexes", journal = "Bioinformatics", volume = "19", number = "14", year = "2003", pages = "1856-1857", abstract = "The Protein Ligand Database (PLD) is a publicly available web-based database that aims to provide further understanding of protein-ligand interactions. The PLD contains biomolecular data including calculated binding energies, Tanimoto ligand similarity scores and protein percentage sequence similarities. The database has potential for application as a tool in molecular design. Availability: http://www-mitchell.ch.cam.ac.uk/pld/", } @Article{Golovin05, author = "A. Golovin and D. Dimitropoulos and T. Oldfield and A. Rachedi and K. Henrick", title = "{MSDsite:} A Database Search and Retrieval System for the Analysis and Viewing of Bound Ligands and Active Sites", journal = "PROTEINS: Structure, Function, and Bioinformatics", volume = "58", number = "1", pages = "190-199", year = "2005", } @Article{Bergner02, author = "A. Bergner and J. Gunther and M. Hendlich and G. Klebe and M. Verdonk", title = "Use of Relibase for Retrieving Complex 3D Interaction Patterns Including Crystallographic Packing Effects", journal = "Biopolymers (Nucleic Acid Sci.)", volume = "61", year = "2002", pages = "99-110", } @Article{Hendlich98, title = "Databases for Protein-Ligand Complexes", author = "M. Hendlich", journal = "Acta Crystallographica", volume = "D54", year = "1998", pages = "1178-1182", comment = "This is the main reference for Relibase, a database of protein-ligand interactions (http://relibase.ebi.ac.uk/).", } @Article{Sheu05, author = "S.H. Sheu and D.R. {Lancia, Jr}, and K.H. Clodfelter and M.R. Landon and S. Vajda", title = "{PRECISE:} a Database of Predicted and Consensus Interaction Sites in Enzymes", journal = "Nucleic Acids Research", year = "2005", volume = "33 (Database issue)", pages = "D206-D211", abstract = "PRECISE (Predicted and Consensus Interaction Sites in Enzymes) is a database of interactions between the amino acid residues of an enzyme and its ligands (substrate and transition state analogs, cofactors, inhibitors and products). It is available online at http://precise.bu.edu/. In the current version, all information on interactions is extracted from the enzyme-ligand complexes in the Protein Data Bank (PDB) by performing the following steps: (i) clustering homologous enzyme chains such that, in each cluster, the proteins have the same EC number and all sequences are similar; (ii) selecting a representative chain for each cluster; (iii) selecting ligand types; (iv) finding non-bonded interactions and hydrogen bonds; and (v) summing the interactions for all chains within the cluster. The output of the search is the color-coded sequence of the representative. The colors indicate the total number of interactions found at each amino acid position in all chains of the cluster. Clicking on a residue displays a detailed list of interactions for that residue. Optional filters allow restricting the output to selected chains in the cluster, to non-bonded or hydrogen bonding interactions, and to selected ligand types. The binding site information is essential for understanding and altering substrate specificity and for the design of enzyme inhibitors.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein structure fundamentals %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Book{Branden99, author = "Carl-Ivar Branden and John Tooze", title = "Introduction to Protein Structure", publisher = "Garland Publishing; 2nd edition", year = "1999", comment = "This is a classic book on protein structure", } @Book{Lesk01, author = "Arthur M. Lesk", title = "Introduction to Protein Architecture: The Structural Biology of Proteins", publisher = "Oxford University Press", year = "2001", comment = "This is a good book on protein structure.", } @Book{Lehninger04, title = "Lehninger Principles of Biochemistry", author = "David L. Nelson and Michael M. Cox", publisher = "W.H. Freeman; 4th edition", year = "2004", comment = "This is a classic book on biochemistry.", } @Article{Hunter93, author = "L. Hunter", title = "Molecular Biology for Computer Scientists", journal = "Artificial Intelligence and Molecular Biology", publisher = "AAAI Press", year = "1993", url = "http://www.aaai.org/Library/Books/Hunter/01-Hunter.pdf", comment = "This is a high-level review article covering all of molecular biology.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein structure characterization %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Varrazzo05, author = "D. Varrazzo and A. Bernini and O.Spiga and A. Ciutti and S. Chiellini and V. Venditti and L. Bracci and Neri Niccolai", title = "Three-dimensional computation of atom depth in complex molecular structures", journal = "Bioinformatics", year = "2005", volume = "21", number = "12", pages = "2856-2860", } @Article{Gerstein00, author = "M. Gerstein and F.M. Richards", title = "Protein Geometry: Volumes, Areas, and Distances", journal = "International Tables for Crystallography (Molecular Geometry and Features in Macromolecular Crystallography)", volume = "Chapter 22, Volume F", year = "2000", } @Article{Tsai99, author = "J. Tsai and R. Taylor and C. Chothia and M. Gerstein", title = "The Packing Density in Proteins: Standard Radii and Volumes", journal = "J. Mol. Biol.", volume = "290", pages = "253-266", year = "1999", } @Book{Singh92, author = "Juswinder Singh and J.M. Thornton", title = "Protein Side-Chain Interactions", publisher = "Oxford University Press", city = "Oxford, UK", year = "1992", } @Article{Sobolev99, author = "V. Sobolev and A. Sorokine and J. Prilusky and E.E. Abola and M. Edelman", title = "Automated analysis of interatomic contacts in proteins", journal = "Bioinformatics", volume = "15", number = "4", year = "1999", pages = "327-332", abstract = "MOTIVATION: New software has been designed to assist the molecular biologist in understanding the structural consequences of modifying a ligand and/or protein. RESULTS: Tools are described for the analysis of ligand-protein contacts (LPC software) and contacts of structural units (CSU software) such as helices, sheets, strands and residues. Our approach is based on a detailed analysis of interatomic contacts and interface complementarity. For any ligand or structural unit, these software automatically: (i) calculate the solvent-accessible surface of every atom; (ii) determine the contacting residues and type of interaction they undergo (hydrophobic-hydrophobic, aromatic-aromatic, etc.); (iii) indicate all putative hydrogen bonds. LPC software further predicts changes in binding strength following chemical modification of the ligand. AVAILABILITY: Both LPC and CSU can be accessed through the PDB and are integrated in the 3DB Atlas page of all PDB files. For any given file, the tools can also be accessed at http://www.pdb.bnl. gov/pdb-bin/lpc?PDB_ID= and http://www.pdb.bnl. gov/pdb-bin/csu?PDB_ID= with the four-letter PDB code added at the end in each case. Finally, LPC and CSU can be accessed at: http://sgedg.weizmann.ac.il/lpc and http://sgedg.weizmann.ac.il/csu.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein fold classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Murzin95, author = "A.G. Murzin and S.E. Brenner and T. Hubbard and C. Chothia", title = "{SCOP:} a structural classification of proteins database for the investigation of sequences and structures", journal = "J. Mol. Biol", volume = "247", pages = "536-540", year = "1995", comment = "This is the main reference for the SCOP hierarchy", } @Article{Andreeva04, author = "A. Andreeva and D. Howorth and S.E. Brenner and T.J.P. Hubbard and C. Chothia and A.G. Murzin", title = "{SCOP} database in 2004: refinements integrate structure and sequence family data", journal = "Nucleic Acids Research", volume = "32", year = "2004", pages = "D226-D229", comment = "This is a more recent reference for the SCOP hierarchy", } @Article{Orengo97, author = "C.A. Orengo and A.D. Michie and S. Jones and D.T. Jones and M.B. Swindells and J.M. Thornton", title = "{CATH} - A Hierarchic Classification of Protein Domain Structures", journal="Structure", volume = "5", number = "8", pages = "1093-1108", year = "1997", comment = "This is the original reference for the CATH hierarchy", url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=9309224&dopt=Citation", } @Article{Pearl05, author = "F. Pearl and A. Todd and I. Sillitoe and M. Dibley and O. Redfern and T. Lewis and C. Bennett and R. Marsden and et al", title = "The {CATH} Domain Structure Database and related resources {Gene3D} and {DHS} provide comprehensive domain family information for genome analysis", journal = "Nucleic Acids Research", volume = "33", year = "2005", pages = "D247-D251", comment = "This is a more recent reference for the CATH hierarchy", } @Article{Taylor02a, author = "W.R. Taylor", title = "A ``periodic table'' for protein structures", journal = "Nature", volume = "416", number = "6881", year = "2002", pages = "657-660", comment = "This paper formalizes both secondary and tertiary links to allow the rigorous and automatic definition of protein topology.", } @Article{Holm96, author = "L. Holm and C. Sander", title = "The {FSSP} database: fold classification based on structure-structure alignment of proteins", journal = "Nucleic Acids Research", volume = "24", number = "1", year = "1996", pages = "206-209", comment = "This is the main reference for the FSSP classification, which is based on DALI structural alignments.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein fold space %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Chothia92, author = "C. Chothia", year = "1992", title = "One thousand families for the molecular biologist", journal = "Nature", volume = "357", pages = "543-544", comment = "This is the classic paper in which Chothia predicted that the number of folds observed in nature is quite small compared to the number of proteins.", } @Article{Chothia86, author = "C. Chothia and A.M. Lesk", title = "The relation between the divergence of sequence and structure in proteins", journal = "The EMBO Journal", volume = "5", pages = "823-826", year = "1986", } @Article{Orengo94, author = "C.A. Orengo and D.T. Jones and J.M. Thornton", title = "Protein superfamilies and domain superfolds", journal = "Nature", volume = "372", year = "1994", pages = "631-634", } @Article{Sander91, author = "C. Sander and R. Schneider", title = "Database of homology-derived protein structures and the structural meaning of sequence alignment", journal = "Proteins", volume = "9", number = "1", year = "1991", pages = "56-68", } @Article{Wang96, author = "{Z-X.} Wang", title = "How many fold types of protein are there in nature?", journal = "Proteins", volume = "26", year = "1996", pages = "186-191", } @Article{Zhang97, author = "{C-T.} Zhang", title = "Relations of the numbers of protein sequences, families and folds", journal = "Protein Engineering", volume = "10", number = "7", pages = "757-761", year = "1997", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Pairwise sequence alignment %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Needleman71, author = "S.B. Needleman and C.D. Wunsch", year = "1971", title = "A general method applicable to the search for similarities in the amino acid sequence of two proteins", journal = "J. Mol. Biol.", volume = "48", pages = "443-453", comment = "This is the original paper on global sequence alignment", } @Article{Smith81, author = "T.F. Smith and M.S. Waterman", title = "Identification of common molecular subsequences", journal = "J. Mol. Biol.", volume = "147", year = "1981", pages = "195-197", comment = "This is the original paper on local sequence alignment. It provides the main reference for the Smith-Waterman alignment score.", } @Article{McGinnis04, author = "S. McGinnis and T.L. Madden", title = "{BLAST:} at the core of a powerful and diverse set of sequence analysis tools", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "W20-W25", comment = "This is the main reference for BLAST", } @Article{Pearson90, author = "W.R. Pearson", title = "Rapid and sensitive sequence comparison with {FASTP} and {FASTA}", journal = "Methods Enzymol", year = "1990", volume = "183", pages = "63-98", comment = "This is the main reference for FASTA", } @Article{Altschul94, author = "S.F. Altschul and M.S. Boguski and W. Gish and J.C. Wootton", title = "Issues in searching molecular sequence databases", journal = "Nature Genetics", volume = "6", number = "2", year = "1994", pages = "119-129", comment = "This is an overview of sequence alignment issues and methods. It provides a good reference for sequence alignment methods as a whole.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Multiple sequence alignment %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Higgins96, author = "D.G. Higgins and J.D. Thompson and T.J. Gibson", title = "Using CLUSTAL for multiple sequence alignments", journal = "Methods Enzymol", volume = "266", year = "1996", pages = "383-402", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Sequence motifs %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Altshul97, author = "S.F. Altschul and T.L. Madden and A.A. Schaffer and J. Zhang and Z. Zhang and W. Miller and D.J. Lipman", title = "Gapped {BLAST} and {PSI-BLAST:} a new generation of protein database search programs", journal = "Nucleic Acids Research", volume = "25", number = "17", pages = "3389-3402", year = "1997", comment = "This is the main reference for PSI-BLAST.", } @Article{Bateman02, author = "A. Bateman and E. Birney and L. Cerruti and R. Durbin and L. Etwiller and S.R. Eddy and S. Griffiths-Jones and K.L. Howe and M. Marshall and E.L.L. Sonnhammer", title = "The {P}fam protein families database", journal = "Nucleic Acids Res", year = "2002", volume = "30", pages = "276-280", comment = "This is the main reference for Pfam.", } @Article{Soding04, author = "J. Soding", title = "Protein homology detection by {HMM-HMM} comparison", journal = "Bioinformatics", year = "2004", volume = "21", pages = "951-960", } @Article{Falquet02, author = "L. Falquet and M. Pagni and P. Bucher and N. Hulo and C.J. Sigrist and K. Hofmann and A. Bairoch", title = "The {PROSITE} database, its status in 2002", year = "2002", journal = "Nucleic Acids Res", volume = "30", pages = "235-238", comment = "Describes the PROSITE database, which contains HMM profiles.", } @Article{Jonassen97, author = "I Jonassen", title = "Efficient discovery of conserved patterns using a pattern graph", journal = "Comput Appl Biosci", volume = "13", year = "1997", pages = "509-522", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Pairwise structure alignment (overviews) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Brown96, author = "N. Brown and C.A. Orengo", title = "A protein structure comparison methodology", journal = "Computers Chem", volume = "20", year = "1996", pages = "359-380", comment = "This provides a nice review of structural alignment issues and methods.", } @Article{Sierk04a, author = "M.L. Sierk and and G.J. Kleywegt", title = "Deja vu all over again: finding and analyzing protein structure similarities", journal = "Structure (Camb)", volume = "12", year = "2004", pages = "2103-2111", comment = "``This article is meant to guide the structural biologist in the basics of structural alignment, and to provide an overview of the available software tools. The main purpose is to encourage users to gain some understanding of the strengths and limitations of structural alignment, and to take these factors into account when interpreting the results of different programs.''", } @Article{Sierk04b, author = "M.L. Sierk and W.R. Pearson", title = "Sensitivity and selectivity in protein structure comparison", journal = "Protein Science", volume = "13", year = "2004", pages = "773-785", comment = "This paper compares alignment methods with ROC curves on CATH database. ``Seven protein structure comparison methods and two sequence comparison programs were evaluated on their ability to detect either protein homologs or domains with the same topology (fold) as defined by the CATH structure database. The structure alignment programs Dali, Structal, Combinatorial Extension (CE), VAST, and Matras were tested along with SGM and PRIDE, which calculate a structural distance between two domains without aligning them. We also tested two sequence alignment programs, SSEARCH and PSI-BLAST. Depending upon the level of selectivity and error model, structure alignment programs can detect roughly twice as many homologous domains in CATH as sequence alignment programs. ... These results help quantify the statistical distinction between analogous and homologous structures, and provide a benchmark for structure comparison statistics.''", } @Article{Godzik96, author = "A. Godzik", title = "The structural alignment between two proteins: is there a unique answer?", journal = "Protein Sci", volume = "5", year = "1996", pages = "1325-1338", comment = "This paper studies ``the problem of uniqueness and stability of structural alignments with the help of visualization of the suboptimal alignments. It is shown that alignments are often degenerate and whole families of alignments can be generated with almost the same score as the optimal alignment.''" } @Article{Holm94, author = "L. Holm and C. Sander", title = "Searching protein structure databases has come of age", journal = "Proteins", volume = "19", pages = "165-173", year = "1994", } @Article{Lemmen00, author = "C. Lemmen and T. Lengauer", title = "Computational methods for the structural alignment of molecules", journal = "J Comput Aided Mol Des", month = "March", volume = "14", number = "3", year = "2000", pages = "215-32", comment = "This paper reviews ``the past six years of scientific publishing on molecular superposition. Our focus lies on automatic procedures to be performed on arbitrary molecular structures. Methodical aspects are our main concern here ... providing pointers to the recent literature providing important contributions to computational methods for the structural alignment of molecules. Finally we provide a perspective on how superposition methods can effectively be used for the purpose of virtual database screening.''", } @Article{Eidhammer00, author = "I. Eidhammer and I. Jonassen and W.R. Taylor", title = "Structure comparison and structure patterns", journal = "J. Comput. Biol.", volume = "7", year = "2000", pages = "658-716", comment = "``This article investigates aspects of pairwise and multiple structure comparison, and the problem of automatically discover common patterns in a set of structures. Descriptions and representation of structures and patterns are described, as well as scoring and algorithms for comparison and discovery. A framework and nomenclature is developed for classifying different methods, and many of these are reviewed and placed into this framework.''", } @Article{Kolodny04, author = "R. Kolodny and N. Linial", title = "Approximate protein structural alignment in polynomial time", journal = "PNAS", year = "2004", volume = "101", number = "33", pages = "12201-12206", comment = "Here, we study the structural alignment problem as a family of optimization problems and develop an approximate polynomial-time algorithm to solve them. We argue that such approximate solutions are, in fact, of greater interest than exact ones because of the noisy nature of experimentally determined protein coordinates.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Pairwise structure alignment (methods) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Holm93, author = "Lisa Holm and Chris Sander", title = "Protein Structure Comparison by Alignment of Distance Matrices", journal = "J. Mol. Biol", volume = "233", pages = "123-138", year = "1993", comment = "This is the main reference for DALI alignment algorithm", } @Article{Holm95, author = "L. Holm and and C. Sander", title = "Dali: a network tool for protein structure comparison", journal = "Trends Biochem Sci", volume = "20", year = "1995", pages = "478-480", comment = "This is a reference for DALI website (http://www.ebi.ac.uk/dali/).", } @Article{Holm00, author = "L. Holm and J. Park", title = "{DaliLite} workbench for protein structure comparison", journal = "Bioinformatics", year = "2000", volume = "16", number = "6", pages = "566-567", comment = "This is the reference for DaliLite (http://ekhidna.biocenter.helsinki.fi:9801/dali_lite/start)", } @Article{Subbiah93, author = "S. Subbiah and D.V. Laurents and M. Levitt", title = "Structural Similarity of {DNA}-binding Domains of Bacteriophage Repressors and the Globin Core", journal = "Current Biol", volume = "3", year = "1993", pages = "141-148", comment = "This is the original reference for STRUCTAL, which uses an EM algorithm that alternates between solving for the best superposition (least squares) and the best correspondences (dynamic programming).", } @Article{Gerstein98, author = "M. Gerstein and M. Levitt", year = "1998", title = "Comprehensive Assessment of Automatic Structural Alignment against a Manual Standard, the {SCOP} Classification of Proteins", journal = "Protein Science", volume = "7", pages = "445-456", comment = "This is the second reference for STRUCTAL", url = "http://bioinfo.mbb.yale.edu/~mbg/preprint/ss-prsci.pdf", } @Article{Krissinel04, author = "E. Krissinel and K. Henrick", title = "Secondary-structure matching ({SSM}), a new tool for fast protein structure alignment in three dimensions", journal = "Acta Crystallogr D Biol Crystallogr", year = "2004", volume = "D60", pages = "2256-2268", comment = "This is the main reference for SSM (http://www.ebi.ac.uk/msd-srv/ssm/), which aligns proteins structures in two phases. The first phase aligns the main alpha-helix and beta-sheet secondary structure elements. The second phase aligns the alpha-carbon atoms of residues more precisely.", } @Article{Harrison03, author = "A. Harrison and F. Pearl and I. Sillitoe and T. Slidel and R. Mott and J.M. Thornton and C. Orengo", title = "Recognising the fold of a protein structure", journal = "Bioinformatics", year = "2003", volume = "19", pages = "1748-1759", comment = "This is the main reference for GRATH", } @Article{Taylor89, author = "W.R. Taylor and C.A.Orengo", title = "Protein Structure Alignment", journal = "J. Mol. Biol.", volume = "208", number = "1", year = "1989", comment = "This is the original reference for SSAP, which employs double dynamic programming.", } @Article{Orengo90, author = "C.A. Orengo and W.R. Taylor", title = "A Rapid Method for Protein Structure Alignment", journal = "J. Theor Biol", year = "1990", volume = "147", pages = "517-551", } @Article{Orengo92, author = "C.A. Orengo and N.P. Brown and W.R. Taylor", title = "Fast structure alignment for protein databank searching", journal = "Proteins", volume = "14", year = "1992", pages = "139-167", comment = "This describes a fast version of SSAP suitable for database searching. It is used to build the 2nd (A) and 3rd (T) levels of the CATH hierarchy.", } @Article{Orengo96, author = "C.A. Orengo and W.R. Taylor", title = "{SSAP:} sequential structure alignment program for protein structure comparison", journal = "Methods Enzymol", volume = "266", year = "1996", pages = "617-635", comment = "This is a reference for SSAP (http://www.biochem.ucl.ac.uk/~orengo/ssap.html)", } @Article{Madej95, author = "T. Madej and J.F. Gibrat and S.H. Bryant", title = "Threading a database of protein cores", journal = "Proteins", year = "1995", volume = "23", pages = "356-369", comment = "This is the main reference for VAST", } @Article{Gibrat96, author = "J.F. Gibrat and T. Madej and S.H. Bryant", title = "Surprising similarities in structure comparison", journal = "Curr Opin Struct Biol", year = "1996", month = "June", volume = "6", number = "3", pages = "377-385", comment = "Describes results achieved with VAST", } @Article{Shindyalov98, author = "I.N. Shindyalov and P.E. Bourne", title = "Protein structure alignment by incremental combinatorial extension ({CE}) of the optimal path", journal = "Protein Eng", year = "1998", volume = "11", pages = "739-747", comment = "This is the main reference for CE (http://cl.sdsc.edu/ce.html).", } @Article{Zhu05, author = "J. Zhu J and Z. Weng", title = "{FAST:} a novel protein structure alignment algorithm", journal = "Proteins", year = "2005", volume = "58", pages = "618-627", comment = "This is the main reference for FAST (http://biowulf.bu.edu/FAST/).", } @Article{Maiti04, author = "R. Maiti and G.H. Van Domselaar and H. Zhang and D.S. Wishart", title = "{SuperPose:} a simple server for sophisticated structural superposition", journal = "Nucleic Acids Res", volume = "1", number = "32", year = "2004", pages = "W590-W594", comment = "This is the main reference for SuperPose (http://wishart.biology.ualberta.ca/SuperPose/).", } @Article{Lessel94, author = "U. Lessel and D. Schomburg", title = "Similarities between protein {3-D} structures", journal = "Protein Engineering", year = "1994", volume = "7", number = "10", pages = "1175-1187", comment = "This is the reference for Protein3Dfit (http://biotool.uni-koeln.de:8080/3dalign_neu/cgi-bin/3daligner.py).", } @Article{Szustakowski00, author = "J.D. Szustakowski and Z. Weng", title = "Protein structure alignment using a genetic algorithm", journal = "Proteins", volume = "38", number = "4", pages = "428-440", year = "2000", comment = "This is the main reference for K2/K2SA (http://zlab.bu.edu/k2sa/).", } @Article{Chen05, author = "L. Chen and T. and T. Zhou and Y. Tang", title = "Protein structure alignment by deterministic annealing", journal = "Bioinformatics", year = "2005", volume = "21", pages = "51-62", } @Article{Ilyin04, author = "V. A. Ilyin and A. Abyzov and C. M. Leslin", title = "Structural alignment of proteins by a novel {TOPOFIT} method, as a superimposition of common volumes at a topomax point", journal = "Protein Sci", volume = "13", number = "7", year = "2004", pages = "1865-1874", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Pairwise structure alignment (comparisons) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kolodny05, author = "Rachel Koldny and Patrice Koehl and Michael Levitt", title = "Comprehensive evaluation of protein structure alignment methods: scoring by geometric measures", journal = "J Mol Biol", volume = "346", year = "2005", pages = "1173-1188", } @Article{Novotny04, author = "M. Novotny and D. Madsen and G.J. Kleywegt", title = "Evaluation of protein fold comparison servers", journal = "Proteins", year = "2004", volume = "54", pages = "260-270", comment = "Watson05: The authors perform a wide-ranging evaluation of 11 publicly available fold comparison servers They use the CATH database as a reference for their tests. The results show that no one server provides 100\% accuracy and therefore multiple methods should be used to assess similarities to known structures.", } @Article{Leplae02, author = "R. Leplae and T.J.P. Hubbard", title = "{MaxBench:} evaluation of sequence and structure comparison methods", journal = "Bioinformatics", volume = "18", number = "3", year = "2002", pages = "494-495", comment = "Compares alignment methods with ROC curves on SCOP database.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Multiple structure alignment %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Russell92, author = "R.B. Russell and G.J. Barton", title = "Multiple protein sequence alignment from tertiary structure comparison: assignment of global and residue confidence levels", journal = "Proteins", volume = "14", pages = "309-323", year = "1992", comment = "This is the main reference for STAMP (bioinfo.ucr.edu/pise/stamp.html).", } @Article{Ye05, author = "Y. Ye and A. Godzik", title = "Multiple flexible structure alignment using partial order graphs", journal = "Bioinformatics", year = "2005", volume = "21", number = "10", pages = "2362-2369", } @Article{Shatsky04, author = "M. Shatsky and R. Nussinov and H.J. Wolfson", title = "A method for simultaneous alignment of multiple protein structures", journal = "Proteins", year = "2004", volume = "56", number = "1", pages = "143-156", comment = "This is the main reference for MultiProt (http://bioinfo3d.cs.tau.ac.il/MultiProt/).", } @Article{Dror03, author = "O. Dror and H. Benyamini and R. Nussinov and H.J. Wolfson", title = "Multiple structural alignment by secondary structures: Algorithm and applications", journal = "Protein Sci", volume = "12", number = "11", year = "2003", pages = "2492-2507", } @Article{Gud04, author = "C. Guda and S. Lu and E.D. Scheeff and P.E. Bourne and I.N. Shindyalov", title = "{CE-MC:} a multiple protein structure alignment server", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "W100-W103", comment = "This is the multiple alignment version of CE (http://cemc.sdsc.edu/).", } @Article{Lupyan05, author = "D. Lupyan and A. Leo-Macias and A.R. Ortiz", title = "A new progressive-iterative algorithm for multiple structure alignment", journal = "Bioinformatics", volume = "21", number = "15", year = "2005", pages = "3255-3263", } @Article{Leibowitz01, author = "N. Leibowitz and R. Nussinov and H.J. Wolfson", title = "{MUSTA} - a general, efficient, automated method for multiple structure alignment and detection of common motifs: application to proteins", journal = "J Comput Biol", year = "2001", volume = "8", number = "2", pages = "93-121", } @Article{Taylor94, author = "W.R. Taylor and T.P. Flores and C.A. Orengo", title = "Multiple protein structure alignment", journal = "Protein Science", volume = "3", number = "10", year = "1994", pages = "1858-1870", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representation overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Campbell03, author = "S.J. Campbell and N.D. Gold and R.M. Jackson and D.R. Westhead", title = "Ligand binding functional site location, similarity and docking", journal = "Curr Opin Struct Biol", year = "2003", volume = "13", pages = "389-395", comment = "Overview of ways to find and compare protein-ligand binding sites", url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12831892&dopt=Citation", } @Article{Sotriffer02, author = "C. Sotriffer and G. Klebe", title = "Identification and mapping of small-molecule binding sites in proteins: computational tools for structure-based drug design", journal = "Il Farmaco", volume = "57", year = "2002", pages = "243-251", abstract = "The number of protein structures is currently increasing at an impressive rate. The growing wealth of data calls for methods to efficiently exploit structural information for medicinal and pharmaceutical purposes. Given the three-dimensional (3D) structure of a validated protein target, the identification of functionally relevant binding sites and the analysis (`mapping’) of these sites with respect to molecular recognition properties are important initial tasks in structure-based drug design. To address these tasks, a variety of computational tools have been developed. Approaches to identify binding pockets include geometric analyses of protein surfaces, comparisons of protein structures, similarity searches in databases of protein cavities, and docking scans to reveal areas of high ligand complementarity. In the context of binding-site analysis, powerful data mining tools help to retrieve experimental information about related protein-ligand complexes. To identify interaction hot spots, various potential functions and knowledge-based approaches are available for mapping binding regions. The results may subsequently be used to guide virtual screenings for new ligands via pharmacophore searches or docking simulations.", } @Article{Via00, author = "A. Via and F. Ferre and B. Brannetti and M. Helmer-Citterich", title = "Protein surface similarities: a survey of methods to describe and compare protein surfaces", journal = "Cellular and Molecular Life Sciences", volume = "57", year = "2000", pages = "1970-1977", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @PhDThesis{Stockwell05, author = "Gareth Stockwell", title = "Structural Diversity of Biological Ligands and their Binding Sites in Proteins", institution = "European Bioinformatics Institute", month = "August", year = "2005", } @Article{Bartlett02, author = "G.J. Bartlett and C.T. Porter and N.Borkakoti and J.M. Thornton", title = "Analysis of catalytic residues in enzyme active sites", year = "2002", journal = "J. Mol. Biol", volume = "324", number = 1, pages = "105-121", } @Article{Puvanendrampillai03, author = "D. Puvanendrampillai and J. Mitchell", title = "{Protein Ligand Database (PLD):} additional understanding of the nature and specificity of protein-ligand complexes", journal = "Bioinformatics", year = "2003", volume = "19", number = "14", pages = "1856-1857", comment = "This is the main reference for the Protein Ligand Database (PLD).", } @Article{Ringe95, author = "D. Ringe", title = "What makes a binding site a binding site?", journal = "Curr Opin Struct Biol", volume = "5", number = "6", year = "1995", pages = "825-829", url = "http://www.citeulike.org/user/nickolay/article/1047020", abstract = Organic probe molecules have recently been used to define hydrophobic binding sites on the surface of proteins. It appears that the presence of water on the surface of a protein plays a crucial role in the interaction between that protein and its binding site.", } @Article{Vajda06, author = "S. Vajda and F. Guarnieri", title = "Characterization of protein-ligand interaction sites using experimental and computational methods" journal = "Curr Opin Drug Discov Devel", volume = "9", number = "3", year = "2006", pages = "354-362", url = "http://www.citeulike.org/user/nickolay/article/782333", abstract = "The ability to identify the sites of a protein that can bind with high affinity to small, drug-like compounds has been an important goal in drug design. Accurate prediction of druggable sites and the identification of small compounds binding in those sites have provided the input for fragment-based combinatorial approaches that allow for a more thorough exploration of the chemical space, and that have the potential to yield molecules that are more lead-like than those found using traditional high-throughput screening. Current progress in experimental and computational methods for identifying and characterizing druggable ligand binding sites on protein targets is reviewed herein, including a discussion of successful nuclear magnetic resonance, X-ray crystallography and tethering technologies. Classical geometric and energy-based computational methods are also discussed, with particular focus on two powerful technologies, that is, computational solvent mapping and grand canonical Monte Carlo simulations (as used by Locus Pharmaceuticals Inc). Both methods can be used to reliably identify druggable sites on proteins and to facilitate the design of novel, low-nanomolar-affinity ligands.", } @Article{Kelly05, author = "M.S. Kelly and R.L. Mancera", title = "A new method for estimating the importance of hydrophobic groups in the binding site of a protein", journal = "J Med Chem", volume = "48", number = "4", year = "2005", pages = "1069-1078", abstract = "Interactions between the hydrophobic regions of a binding site and those of a complementary ligand are often observed to provide the driving force for binding. We present a new method for the analysis of hydrophobic regions in the binding site of a protein that considers not only atom type but also the nonadditive effects arising from the shape and extent of a nonpolar region. The method has been parametrized using a purpose-built genetic algorithm to optimize its ability to identify those regions that are more likely to form a strong interaction with a nonpolar ligand group. We demonstrate the ability of this method to account for changes in the shape and extent of the exposed nonpolar surface, using both artificial and protein examples. The method is also able to rationalize differences in binding affinity for ligand-protein complexes with largely hydrophobic binding sites.", } @Article{Lian94, author = "L.Y. Lian and I.L. Barsukov and M.J. Sutcliffe and K.H. Sze and G.C. Roberts", title = "Protein-ligand interactions: exchange processes and determination of ligand conformation and protein-ligand contacts", journal = "Methods Enzymol", volume = "239", year = "1994", pages = "657-700", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from geometry %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Weisel07, author = "M. Weisel and E. Proschak and G. Schneider", title = "{PocketPicker:} analysis of ligand binding-sites with shape descriptors", journal = "Chemistry Central Journal", volume = "1", number = "7", year = "2007", url = "http://journal.chemistrycentral.com/content/1/1/7", } @Article{Brady00, author = "G.P. {Brady, Jr.} and P.F.W. Stouten", title = "Fast prediction and visualization of protein binding pockets with {PASS}", journal = "Journal of Computer-Aided Molecular Design", volume = "14", number = "4", month = "May", year = "2000", pages = "383-401", comment = "This is the main reference for PASS, a system for detecting pockets in proteins that successively constructs layers of points starting at the surface of the protein and working towards the middle of voids. Points are rejected if they are ``too'' solvent accessible, thus leaving points only inside pockets.", } @Article{Peters96, author = "K.P. Peters and J. Fauck and C. Frommel", title = "The automatic search for ligand binding sites in proteins of known three-dimensional structure using only geometric criteria", journal = "J Mol Biol", volume = "256", year = "1996", pages = "201-213", comment = "This is the main reference for APROPOS, a system for detecting protein pockets with alpha shapes.", } @Article{Hendlich97, author = "M. Hendlich and F. Rippman and G. Barnickel", title = "{LIGSITE:} automatic and efficient detection of potential small molecule-binding sites in proteins", journal = "J. Mol. Graph.", volume = "15", year = "1997", pages = "359-363", comment = "This paper is the main reference for LIGSITE, a method to detect binding site pockets. Following POCKET [Levitt], it fills a grid with values representing the number of angles from which every point is visible to the outside (sampling only 7 angles), thereby providing a measure of how deeply a point is embedded in a concave pocket.", } @Article{Levitt92, author = "D. Levitt and L. Banaszak", title = "POCKET: A computer graphics method for identifying and displaying protein cavities and their surrounding amino acids", journal = "J. Mol. Graphics", volume = "10", year = "1992", pages = "229-234", comment = "This is the main reference for POCKET, a system for identifying free-space points deeply buried in pockets by counting the number of axial directions for which the point is occluded from both directions. This method is followed-up by LIGSITE, which considers more than just 3 axial directions.", } @Article{Nayal06, author = "M. Nayal and B. Honig", title = "On the nature of cavities on protein surfaces: Application to the identification of drug-binding sites" journal = "Proteins: Structure, Function, and Bioinformatics", volume = "63", number = "4", year = "2006", pages = "892-906", url = "http://www3.interscience.wiley.com/cgi-bin/abstract/112411389/ABSTRACT?CRETRY=1&SRETRY=0", abstract = "In this article we introduce a new method for the identification and the accurate characterization of protein surface cavities. The method is encoded in the program SCREEN (Surface Cavity REcognition and EvaluatioN). As a first test of the utility of our approach we used SCREEN to locate and analyze the surface cavities of a nonredundant set of 99 proteins cocrystallized with drugs. We find that this set of proteins has on average about 14 distinct cavities per protein. In all cases, a drug is bound at one (and sometimes more than one) of these cavities. Using cavity size alone as a criterion for predicting drug-binding sites yields a high balanced error rate of 15.7\%, with only 71.7\% coverage. Here we characterize each surface cavity by computing a comprehensive set of 408 physicochemical, structural, and geometric attributes. By applying modern machine learning techniques (Random Forests) we were able to develop a classifier that can identify drug-binding cavities with a balanced error rate of 7.2\% and coverage of 88.9\%. Only 18 of the 408 cavity attributes had a statistically significant role in the prediction. Of these 18 important attributes, almost all involved size and shape rather than physicochemical properties of the surface cavity. The implications of these results are discussed. A SCREEN Web server is available at http://interface.bioc.columbia.edu/screen.", } @Article{Ho90, author = "C.M.W. Ho and G.R. Marshall", title = "{Cavity Search:} an algorithm for the isolation and display of cavity-like binding regions", journal = "J Comput-Aided Mol Des", vlume = "4", year = "1990", pages = "337-354", comment = "This is the main reference for Cavity Search.",} } @Article{Coleman06, author = "R.G. Coleman and K.A.Sharp", title = "{Travel depth,} a new shape descriptor for macromolecules: application to ligand binding", journal = "J Mol Biol", volume = "362", year = "2006", pages = "441-458", comment = "This is the main reference for Travel detph.", } @Article{Kim06, author = "D. Kim and C. Cho and D. Kim and Y. Cho", title = "Recognition of docking sites on a protein using [beta]-shape based on {Voronoi} diagram of atoms", journal = "Computer-Aided Design", volume = "38", number = "5", month = "May", year = "2006", pages = "431-443", url = "http://www.citeulike.org/user/nickolay/article/1049127", abstract = "A protein consists of atoms. Given a protein, the automatic recognition of depressed regions on the surface of the protein, often called docking sites or pockets, is important for the analysis of interaction between a protein and a ligand and facilitates fast development of new drugs.Presented in this paper is a geometric approach for the detection of docking sites using [beta]-shape which is based on the Voronoi diagram for atoms in Euclidean distance metric. We first propose a geometric construct called a [beta]-shape which represents the proximity among atoms on the surface of a protein. Then, using the [beta]-shape, which takes the size differences among different atoms into account, we present an algorithm to extract the pockets for the possible docking site on the surface of a protein.", } @Article{Frommel96, author = "C. Frommel and K.P. Peters and J. Fauck", title = "The automatic search for ligand binding sites in proteins of known three dimentional structure using only geometric criteria", journal = "J. Mol. Biol.", volume = "256", year = "1996", pages = "201-213", } @Article{Pettit99, author = "F.K. Pettit and J.U. Bowie", title = "Protein surface roughness and small molecular binding sites", journal = "J. Mol. Biol.", volume = "285", year = "1999", pages = "1377-1382", } @Article{Laskowski96a, author = "R.A. Laskowski and N.M. Luscombe and M.B. Swindells and J.M. Thornton", title = "Protein clefts in molecular recognition and function", year = "1996", journal = "Prot. Sci", volume = "5", number = "12", pages = "2438-2452", comment = "This paper analyzes the properties of binding sites predicted with Surfnet", } @Article{Laskowski95, author = "R.A. Laskowski", title = "Surfnet: a program for visualizing molecular surfaces, cavities, and intermolecular interactions", journal = "J Mol Graph", year = "1995", volume = "13", pages = "323-330", comment = "This is the main reference for Surfnet, a program that detects binding site pockets by constructing spheres whose diameters are chords between solvent accessible residues of the protein - spheres are rejected if the center of the chord lies with a certain distance (4 angstroms) of the protein surface or if the chord is more than a certain length (10 angstroms). The pocket is predicted to be the volume covered by the union of the spheres.", url = "http://www.biochem.ucl.ac.uk/~roman/surfnet/surfnet.html", } @Article{Masuya95, author = "M. Masuya and J. Doi", title = "Detection and geometric modeling of molecular surfaces and cavities using digital mathematical morphological operations", journal = "J Mol Graph", volume = "13", year = "1995", pages = "331-336", comment = "Uses mathematical morphology operations (erode, dilate, close) to detect cavities in a protein surface as the difference between the closure of the protein surface using a certain radius and the molecule itself. The method is demonstrated for two proteins.", } @Article{DelCarpio93, author = "C.A. {Del Carpio} and Y. Takahashi and S. Sasaki", title = "A New Approach to the Automatic Identification of Candidates for Ligand Receptor Sites in Proteins: {(I)} Search for Pocket Regions", journal = "J. Mol. Graph.", volume = "11", year = "1993", pages = "23-29", } @Article{Chang04, author = "D.T. Chang and C.Y. Chen and W.C. Chung and Y.J. Oyang and H.F. Juan and H.C. Huang", title = "{ProteMiner-SSM:} a web server for efficient analysis of similar protein tertiary substructures", journal = "Nucleic Acids Res", volume = "32", year = "2004", pages = "W76-W82", comment = "Uses probability distributions derived from splats at atoms to detect binding sites.", } @Article{Halperin03, author = "I. Halperin and H. Wolfson and R. Nussinov", title = "{SiteLight:} Binding-site prediction using phage display libraries", journal = "Protein Science", volume = "12", year = "2003", pages = "1344-1359", abstract = "Phage display enables the presentation of a large number of peptides on the surface of phage particles. Such libraries can be tested for binding to target molecules of interest by means of affinity selection. Here we present SiteLight, a novel computational tool for binding site prediction using phage display libraries. SiteLight is an algorithm that maps the 1D peptide library onto a three-dimensional (3D) protein surface. It is applicable to complexes made up of a protein Template and any type of molecule termed Target. Given the three-dimensional structure of a Template and a collection of sequences derived from biopanning against the Target, the Template interaction site with the Target is predicted. We have created a large diverse data set for assessing the ability of SiteLight to correctly predict binding sites. SiteLight predictive mapping enables discrimination between the binding and nonbinding parts of the surface. This prediction can be used to effectively reduce the surface by 75\% without excluding the binding site. In 63\% of the cases we have tested, there is at least one binding site prediction that overlaps the interface by at least 50\%. These results suggest the applicability of phage display libraries for automated binding site prediction on three-dimensional structures. For most effective binding site prediction we propose using a random phage display library twice, to scan both binding partners of a given complex. The derived peptides are mapped to the other binding partner (now used as a Template). Here, the surface of each partner is reduced by 75\%, focusing their relative positions with respect to each other significantly. Such information can be utilized to improve docking algorithms and scoring functions.", } @Article{BenShimon05, author = "A. {Ben-Shimon} and M. Eisenstein", title = "Looking at Enzymes from the Inside out: The Proximity of Catalytic Residues to the Molecular Centroid can be used for Detection of Active Sites and Enzyme-Ligand Interfaces", journal = "J. Mol. Biol.", volume = "351", year = "2005", pages = "309-326", abstract = "Analysis of the distances of the exposed residues in 175 enzymes from the centroids of the molecules indicates that catalytic residues are very often found among the 5\% of residues closest to the enzyme centroid. This property of catalytic residues is implemented in a new prediction algorithm (named EnSite) for locating the active sites of enzymes and in a new scheme for re-ranking enzyme–ligand docking solutions. EnSite examines only 5\% of the molecular surface (represented by surface dots) that is closest to the centroid, identifying continuous surface segments and ranking them by their area size. EnSite ranks the correct prediction 1–4 in 97\% of the cases in a dataset of 65 monomeric enzymes (rank 1 for 89\% of the cases) and in 86\% of the cases in a dataset of 176 monomeric and multimeric enzymes from all six top-level enzyme classifications (rank 1 in 74\% of the cases). Importantly, identification of buried or flat active sites is straightforward because EnSite "looks" at the molecular surface from the inside out. Detailed examination of the results indicates that the proximity of the catalytic residues to the centroid is a property of the functional unit, defined as the assembly of domains or chains that form the active site (in most cases the functional unit corresponds to a single whole polypeptide chain). Using the functional unit in the prediction further improves the results. The new property of active sites is also used for re-evaluating enzyme-inhibitor unbound docking results. Sorting the docking solutions by the distance of the interface to the centroid of the enzyme improves remarkably the ranks of nearly correct solutions compared to ranks based on geometric-electrostatic-hydrophobic complementarity scores." } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from conservation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Pils06, author = "B. Pils and R.R. Copley and J. Schultz", title = "Variation in structural location and amino acid conservation of functional sites in protein domain families", journal = "{BMC} Bioinformatics", volume = "6", year = "2005", abstract = "BACKGROUND: The functional sites of a protein present important information for determining its cellular function and are fundamental in drug design. Accordingly, accurate methods for the prediction of functional sites are of immense value. Most available methods are based on a set of homologous sequences and structural or evolutionary information, and assume that functional sites are more conserved than the average. In the analysis presented here, we have investigated the conservation of location and type of amino acids at functional sites, and compared the behaviour of functional sites between different protein domains. RESULTS: Functional sites were extracted from experimentally determined structural complexes from the Protein Data Bank harbouring a conserved protein domain from the SMART database. In general, functional (i.e. interacting) sites whose location is more highly conserved are also more conserved in their type of amino acid. However, even highly conserved functional sites can present a wide spectrum of amino acids. The degree of conservation strongly depends on the function of the protein domain and ranges from highly conserved in location and amino acid to very variable. Differentiation by binding partner shows that ion binding sites tend to be more conserved than functional sites binding peptides or nucleotides. CONCLUSION: The results gained by this analysis will help improve the accuracy of functional site prediction and facilitate the characterization of unknown protein sequences.", } @Article{Cheng05, author = "G. Cheng and B. Qian and R. Samudrala and D. Baker", title = "Improvement in protein functional site prediction by distinguishing structural and functional constraints on protein family evolution using computational design", journal = "Nucleic Acids Res", volume = "33", number = "18", year = "2005", pages = "5861-5867", abstract = "The prediction of functional sites in newly solved protein structures is a challenge for computational structural biology. Most methods for approaching this problem use evolutionary conservation as the primary indicator of the location of functional sites. However, sequence conservation reflects not only evolutionary selection at functional sites to maintain protein function, but also selection throughout the protein to maintain the stability of the folded state. To disentangle sequence conservation due to protein functional constraints from sequence conservation due to protein structural constraints, we use all atom computational protein design methodology to predict sequence profiles expected under solely structural constraints, and to compute the free energy difference between the naturally occurring amino acid and the lowest free energy amino acid at each position. We show that functional sites are more likely than non-functional sites to have computed sequence profiles which differ significantly from the naturally occurring sequence profiles and to have residues with sub-optimal free energies, and that incorporation of these two measures improves sequence based prediction of protein functional sites. The combined sequence and structure based functional site prediction method has been implemented in a publicly available web server.", } @Article{Huang06, author = "B. Huang and M. Schroeder", title = "{LIGSITEcsc:} predicting ligand binding sites using the {Connolly} surface and degree of conservation", journal = "{BMC} Struct Biol", volume = "6", year = "2006", pages = "19-29", software = "http://scoppi.biotec.tu-dresden.de/pocket/download.html", } @Article{Glaser06, author = "F. Glaser and R. Morris and R. Najmanovich and R. Laskowski and J. Thornton", title = "A method for localizing ligand binding pockets in protein structures", journal = "Proteins", volume = "62", year = "2006", pages = "479-488", } @Article{Nimrod05, author = "G. Nimrod and F. Glaser and D. Steinberg and N. {Ben-Tal} and T. Pupko", title = "In silico identification of functional regions in proteins", journal = "Bioinformatics", volume = "21 Suppl.", year = "2005", pages = "i328-i337", } @Article{Chelliah04, author = "V. Chelliah and L. Chen and T. Blundell and S. Lovell", title = "Distinguishing structural and functional restraints in evolution in order to identify interaction sites" , journal = "J Mol Biol", year = "2004", volume = "342", pages = "1487-1504", comment = "Watson05: This method distinguishes residues conserved for functional reasons from those that are highly conserved because they are constrained by the structure. By comparing the observed sequence conservation with the predicted conservation (based on amino acid type and environmental constraints), the authors construct environment-specific substitution tables for use in identifying functionally conserved residues", } @Article{Innis04, author = "C.A. Innis and A.P. Anand and R. Sowdhamini", title = "Prediction of functional sites in proteins using conserved functional group analysis", journal = "J Mol Biol", year = "2004", volume = "337", pages = "1053-1068", comment = "Watson05: This new method describes the conservation of a protein-surface using chemical groups rather than the amino acids A multiple sequence alignment is used to identify conserved functional group clusters, the size of which is determined by the number of proteins contributing to it. These are mapped onto the surface to identify active sites", } @Article{Lichtarge03, author = "O. Lichtarge and H. Yao and D.M. Kristensen and S. Madabushi and I. Mihalek", title = "Accurate and scalable identification of functional sites by evolutionary tracing", journal = "J Struct Funct Genomics", volume = "4", year = "2003", pages = "159-166", } @Article{Lichtarge02, author = "O. Lichtarge and M.E. Sowa", title = " Evolutionary predictions of binding surfaces and interactions", journal = "Curr Opin Struct Biol", volume = "12", year = "2002", pages = "21-27", } @Article{Joachimiak02, author = "M.P. Joachimiak and F.E. Cohen", title = "JEvTrace: refinement and variations of the evolutionary trace in JAVA", journal = "Genome Biol", year = "2002", volume = "3", pages = "RESEARCH0077", } @Article{DelSolMesa03, author = "A. {Del Sol Mesa} and F. Pazos and A. Valencia", title = "Automatic methods for predicting functionally important residues", journal = "J Mol Biol", year = " 2003", volume = "326", pages = "1289-1302", url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12589769&dopt=Citation", } @Article{Yao03, author = "H. Yao and D.M. Kristensen and I. Mihalek and M.E. Sowa and C. Shaw C and M. Kimmel and L. Kavraki and O. Lichtarge", title = "An accurate, sensitive and scalable method to identify functional sites in protein structures", journal = "J Mol Biol", year = "2003", volume = "334", pages = "387-401", } @Article{Sjolander04, author = "K. Sjolander", title = "Phylogenomic inference of protein molecular function: advances and challenges", journal = "Bioinformatics", year = "2004", volume = "20", pages = "170-179", } @Article{La05, author = "D. La and B. Sutch and D.R. Livesay", title = "Predicting protein functional sites with phylogenetic motifs", journal = "Proteins", year = "2005", volume = "58", pages = "309-320", } @Article{Abhiman05, author = "S. Abhiman and E.L.L. Sonnhammer", title = "FunShift: a database of function shift analysis on protein subfamilies", journal = "Nucleic Acids Res", year = "2005", volume = "33", pages = "D197-D200", } @Article{Zhang99, author = "B. Zhang and L. Rychlewski and K. Pawlowski and J.S. Fetrow and J. Skolnick and A. Godzik", title = "From fold predictions to function predictions: automation of functional site conservation analysis for functional genome predictions", journal = "Protein Sci", month = "May", volume = "8", number = "5", year = "1999", pages = "1104-1115", comment = "SITE/Site Match", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from probe energetics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Laurie05, author = "A.T.R. Laurie and R.M. Jackson", title = "Q-SiteFinder: an energy-based method for the prediction of protein-ligand binding sites", journal = "Bioinformatics", year = "2005", note = "in press", comment = "Predicts binding site location by filling grid with van der Waals interaction potentials for a methyl probe. Evaluated with data set of 134 proteins.", } @Article{An05, author = "J. An and M. Totrov and R. Abagyan", title = "Pocketome via comprehensive identification and classification of ligand binding envelopes", journal = "Mol Cell Proteomics", volume = "4", number = "6", month = "Jun", year = "2005", pages = "752-761", comment = "This is a reference for PocketFinder, a system for detecting binding site pockets by filling a grid with values representing a van der Waals force field (according to Lennard-Jones formula). A suitable theshold is chosen, and the grid points with value above the threshold are considered ``inside'' the binding pocket. The paper contains an evaluation of the method for a large number of PDB files, both with and without bound ligands." } @Article{An04, author = "J. An and M. Totrov and R. Abagyan", title = "Comprehensive Identification of ``Druggable'' Protein Ligand Binding Sites", journal = "Genome Informatics", volume = "15", number = "2", year = "2004", pages = "31-41", comment = "This is very similar to [An05]", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from electrostatic potential %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Bate04, author = "P. Bate and J. Warwicker", title = "Enzyme/non-enzyme discrimination and prediction of enzyme active site location using charge-based methods", journal = "J Mol Biol", year = "2004", volume = "340", number = "2", pages = "263-276", } @Article{Shanahan04, author = "H.P. Shanahan and M.A. Garcia and S. Jones and J.M. Thornton", title = "Identifying {DNA}-binding proteins using structural motifs and the electrostatic potential", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "4732-4741", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from residue instability %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Elcock01, author = "A.H. Elcock", title = "Prediction of functionally important residues based solely on the computed energetics of protein structure", journal = "J. Mol. Biol.", volume = "312", number = "4", pages = "885-896", year = "2001", comment = "Uses instability of residues to predict which ones are in active binding site - among conserved residues, predicts that stable ones are in core, and instable ones are in active site.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from residue packing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Amitai04, author = "G. Amitai and A. Shemesh and E. Sitbon and M. Shklar and D. Netanely and I. Venger and S. Pietrokovski", title = "Network analysis of protein structures identifies functional residues", journal = "J Mol Biol", year = "2004", volume = "344", pages = "1135-1146", comment = "``We transformed protein structures into residue interaction graphs (RIGs), where amino acid residues are graph nodes and their interactions with each other are the graph edges. We found that active site, ligand-binding and evolutionary conserved residues, typically have high closeness values. Residues with high closeness values interact directly or by a few intermediates with all other residues of the protein. Combining closeness and surface accessibility identified active site residues in 70\% of 178 representative structures.''", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from microscopic titration curves %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Ko05, author = "J. Ko and J.L.F. Murga and Y. Wei and M.J. Ondrechen", title = "Prediction of active sites for protein structures from computed chemical properties", journal = "Bioinformatics", volume = "21", number = "1", pages = "i258-i265", year = "2005", comment = "Uses microscopic titration curves to detect functional residues", } @Article{Ondrechen01, author = "M.J. Ondrechen and J.G. Clifton and D. Ringe", title = "Thematics: a simple computational predictor of enzyme function from structure", journal = "Proc. Natl Acad. Sci.", volume = "98", year = "2001", pages = "12473-12478", } @Article{Ringe04, author = "D. Ringe D and Y. Wei and K.R. Boino and M.J. Ondrechen", title = "Protein structure to function: insights from computation", journal = "Cell Mol Life Sci", year = "2004", volume = "61", pages = "387-392", comment = "Finds binding sites using theoretical microscopic titration curves", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site detection from docking analyses %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Silberstein03, author = "Michael Silberstein and Sheldon Dennis and Lawrence {Brown III} and Tamas Kortvelyesi and Karl Clodfelter and Sandor Vajda", title = "Identification of Substrate Binding Sites in Enzymes by Computational Solvent Mapping", journal = "J. Mol. Biol.", volume = "332", year = "2003", pages = "1095-1113", comment = "Docks many small molecule fragments and then predicts that active residues are the ones closest to the docked positions of the fragments", } @Article{Dennis02, author = "Sheldon Dennis and Tamas Kortvelyesi and Sandor Vajda", title = "Computational mapping identifies the binding sites of organic solvents on proteins", journal = "PNAS", month = "April", year = "2002", volume = "99", number = "7", pages = "4290-4295", abstract = "Computational mapping places molecular probes—small molecules or functional groups on a protein surface to identify the most favorable binding positions. Although x-ray crystallography and NMR show that organic solvents bind to a limited number of sites on a protein, current mapping methods result in hundreds of energy minima and do not reveal why some sites bind molecules with different sizes and polarities. We describe a mapping algorithm that explains the origin of this phenomenon. The algorithm has been applied to hen egg-white lysozyme and to thermolysin, interacting with eight and four different ligands, respectively. In both cases the search finds the consensus site to which all molecules bind, whereas other positions that bind only certain ligands are not necessarily found. The consensus sites are pockets of the active site, lined with partially exposed hydrophobic residues and with a number of polar residues toward the edge. These sites can accommodate each ligand in a number of rotational states, some with a hydrogen bond to one of the nearby donoracceptor groups. Specific substrates and/or inhibitors of hen egg-white lysozyme and thermolysin interact with the same side chains identified by the mapping, but form several hydrogen bonds and bind in unique orientations.", } @Article{Bliznyuk99, author = "A. Bliznyuk and J. Gready", title = "Simple method for locating possible ligand binding sites on protein surfaces", journal = "J. Comput. Chem.", volume = "9", year = "1999", pages = "983-988", comment = "Uses FFT to dock rigid ligand using a simple shape correlation function in order to find the correct binding site, which will later be analyzed by more detailed (energetic) docking methods.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protiein-ligand binding site analysis and prediction from multiple properties %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Guo05, author = "T. Guo and Y. Shi and Z. Sun", title = "A novel statistical ligand-binding site predictor: application to {ATP}-binding sites" journal = "Protein Engineering, Design and Selection", volume = "18", number = "2", year = "2005", pages = "65-70", } @Article{Rossi06, author = "A. Rossi and M.A. {Marti-Renom} and A. Sali", title = "Localization of binding sites in protein structures by optimization of a composite scoring function", journal = "Protein Sci", volume = "15", number = "10", year = "2006", pages = "2366-2380", url = "http://www.citeulike.org/user/nickolay/article/1049131", abstract = "The rise in the number of functionally uncharacterized protein structures is increasing the demand for structure-based methods for functional annotation. Here, we describe a method for predicting the location of a binding site of a given type on a target protein structure. The method begins by constructing a scoring function, followed by a Monte Carlo optimization, to find a good scoring patch on the protein surface. The scoring function is a weighted linear combination of the z-scores of various properties of protein structure and sequence, including amino acid residue conservation, compactness, protrusion, convexity, rigidity, hydrophobicity, and charge density; the weights are calculated from a set of previously identified instances of the binding-site type on known protein structures. The scoring function can easily incorporate different types of information useful in localization, thus increasing the applicability and accuracy of the approach. To test the method, 1008 known protein structures were split into 20 different groups according to the type of the bound ligand. For nonsugar ligands, such as various nucleotides, binding sites were correctly identified in 55%-73% of the cases. The method is completely automated (http://salilab.org/patcher) and can be applied on a large scale in a structural genomics setting.", } @Article{Zvelebil88, author = "M.J.J.M. Zvelebil and M.J.E. Sternberg", title = "Analysis and prediction of the location of catalytic residues in enzymes", journal = "Protein Engineering", volume = "2", number = "2", year = "1988", pages = "127-138", abstract = "The catalytic residues of an enzyme are defined as the amino acids directly involved in chemical catalysis. They mainly act as a general acid-base, electrophilic or nucleophilic catalyst or they polarize and stabilize the transition state. An analysis of the structural features of 36 catalytic residues in 17 enzymes of known structure and with defined mechanism is reported. Residues that bind metal ions (Zn2 and Cu2) are considered separately. The features examined are: residue type, location in secondary structure, separation between the residues, accessibility to solvent, intra-protein electrostatic interactions, mobility as evaluated from crystallographic temperature factors, polarity of the environment and the sequence conservation between homologous enzymes of residues that were sequentially or spatially close to the catalytic residue. In general the environment of catalytic residues is similar to that of polar side chains that have low accessibility to solvent. Two algorithms have been developed to identify probable catalytic residues. Scanning an alignment of homologous enzyme sequences for peaks of sequence conservation identifies 13 out of the 16 catalytic residues with 50 residues overpredicted. When the conservation of the spatially close residues is used instead, a different set of 13 residues are identified with 47 residues overpredicted. A combination of the two algorithms identifies 11 residues with 36 residues overpredicted.", } @Article{Huan-Xiang01, author = "Z. Huan-Xiang and S. Yibing", title = "Prediction of protein interaction sites from sequence profile and residue neighbor list", journal = "Proteins", volume = "44", year = "2001", pages = "336-343", } @InProceedings{Cilia07, author = "Elisa Cilia", title = "Protein Active Site Detection using SVMs and Kernel Methods", booktitle = "Contribution to the Learning and Intelligent Optimization Workshop ({LION}), location = "Andalo ({TN}), Italy", year = "2007", url = "http://dit.unitn.it/~cilia/publications/LION2007.pdf", comment = "This looks like a report to the author's research group", } @InProceedings{Cilia06, author = "Elisa Cilia and Alessandro Moschitti and Sergio Ammendola and Roberto Basili", title = "Structured Kernels for Automatic Detection of Protein Active Sites", booktitle = "Mining and Learning with Graphs Workshop ({MLG})", year = "2006", url = "http://www.inf.uni-konstanz.de/mlg2006/11.pdf", abstract= "In this paper, we design novel models based on Support Vector Machines and Kernel Methods for the automatic protein active site classiffication. We devise innovative attribute-value and tree substructure representations derived from biological and spatial information of proteins. We experimented such models with the Protein Data Bank adequately pre-processed to make explicit the active site information. Our results show that structural kernels used in combination with polyno- mial kernels can be effectively applied to discriminate an active site from other regions of a protein. Such finding is very important since it firstly shows the successful identification of catalytic sites of a very large family of catalytic proteins belonging to a broad classes of enzymes.", } @Article{Petrova06, author = "N.V. Petrova and C.H. Wu", title = "Prediction of catalytic residues using support vector machine with selected protein sequence and structural properties", journal = "{BMC} Bionformatics", volume = "7", year = "2006", pages = "312-324", } @Article{Keil04, author = "M. Keil and T.E. Exner and J. Brickmann", title = "Pattern recognition strategies for molecular surfaces: {III.} Binding site prediction with a neural network", journal = "J Comput Chem", volume = "25", number = "6", year = "2004", pages = "779-789", url = "http://www.citeulike.org/user/nickolay/article/927831", abstract = "An algorithm for the identification of possible binding sites of biomolecules, which are represented as regions of the molecular surface, is introduced. The algorithm is based on the segmentation of the molecular surface into overlapping patches as described in the first article of this series.1 The properties of these patches (calculated on the basis of physical and chemical properties) are used for the analysis of the molecular surfaces of 7821 proteins and protein complexes. Special attention is drawn to known protein binding sites. A binding site identification algorithm is realized on the basis of the calculated data using a neural network strategy. The neural network is able to classify surface patches as protein-protein, protein-DNA, protein-ligand, or nonbinding sites. To show the capability of the algorithm, results of the surface analysis and the predictions are presented and discussed with representative examples.", } @Article{Gutteridge03, author = "A. Gutteridge and G.J. Bartlett and J.M. Thornton", title = "Using a neural network and spatial clustering to predict the location of active sites in enzymes", journal = "J Mol Biol", year = "2003", volume = "330", pages = "719-734", } @Article{Bradford04, author = "J.R. Bradford and D.R. Westhead", title = "Improved prediction of protein-protein binding sites using a support vector machines approach", journal = "Bioinformatics", year = " 2004", } @Article{Chen04, author = "S-C. Chen and I. Bahar", title = "Mining frequent patterns in protein structures: a study of protease families", journal = "Bioinformatics", volume = "20", number = "1", year = "2004", pages = "i1-i9", } @Article{Alexandrov94, author = "N.N. Alexandrov and N. Go", title = "Biological meaning, statistical significance, and classification of local spatial similarities in nonhomologous proteins", journal = "Protein Sci", volume = "3", year = "1994", pages = "866-875", } @Article{Bagley95a, author = "S.C. Bagley and R.B. Altman", title = "Characterizing the microenvironment surrounding protein sites", journal = "Protein Sci", year = "1995", volume = "4", number = "4", pages = "622-635", comment = "This is the main reference for Feature. ``Sites are microenvironments within a biomolecular structure, distinguished by their structural or functional role. A site can be defined by a three-dimensional location and a local neighborhood around this location in which the structure or function exists. We have developed a computer system to facilitate structural analysis (both qualitative and quantitative) of biomolecular sites. Our system automatically examines the spatial distributions of biophysical and biochemical properties, and reports those regions within a site where the distribution of these properties differs significantly from control nonsites. The properties range from simple atom-based characteristics such as charge to polypeptide-based characteristics such as type of secondary structure. Our analysis of sites uses non-sites as controls, providing a baseline for the quantitative assessment of the significance of the features that are uncovered. In this paper, we use radial distributions of properties to study three well-known sites (the binding sites for calcium, the milieu of disulfide bridges, and the serine protease active site). We demonstrate that the system automatically finds many of the previously described features of these sites and augments these features with some new details. In some cases, we cannot confirm the statistical significance of previously reported features. Our results demonstrate that analysis of protein structure is sensitive to assumptions about background distributions, and that these distributions should be considered explicitly during structural analyses.''", } @Article{Bagley95b, author = "S.C. Bagley and L. Wei and C. Cheng and R.B. Altman", title = "Characterizing oriented protein structural sites using biochemical properties", journal = "Proc Int Conf Intell Syst Mol Biol", year = "1995", volume = "3", pages = "12-20", comment = "``A protein site is a region of a three-dimensional protein structure with a distinguishing functional or structural role. Certain sites recur in different protein structures (for example catalytic sites, calcium binding sites, and some types of turns), but maintain critical shared features. To facilitate the analysis of such protein sites, we have developed a computer system for analyzing the spatial distributions of biochemical properties around a site. The system takes a set of similar sites and a set of control nonsites, and finds differences between them. Specifically, it compares distributions of the properties surrounding the sites with those surrounding the nonsites, and reports statistically significant differences. In this paper, we use our method to analyze the features in the active site of the serine protease enzymes. We compare the use of radial distributions (shells) with 3-D grids (blocks) in the analysis of the active site. We demonstrate three different strategies for focusing attention on significant findings, based on properties of interest, spatial volumes of interest, and on the level of statistical significance. Finally, we show that the program automatically identifies conserved sequential, secondary structural and biophysical features of the serine protease active site, using noncatalytic histidine residues as a control environment.''", } @Article{Bagley96, author = "S.C. Bagley and R.B. Altman", title = "Conserved features in the active site of nonhomologous serine proteases", journal = "Fold Des", year = "1996", volume = "1", number = "5", pages = "371-379", comment = "``BACKGROUND: Serine protease activity is critical for many biological processes and has arisen independently in a few different protein families. It is not clear, though, the degree to which these protease families share common biochemical and biophysical properties. We have used a computer program to study the properties that are shared by four serine protease active sites with no overall structural or sequence homology. The program systematically compares the region around the catalytic histidines from the four proteins with a set of noncatalytic histidines, used as controls. It reports the three-dimensional locations and level of statistical significance for those properties that distinguish the catalytic histidines from the noncatalytic ones. The method of analysis is general and can be applied easily to other active sites of interest. RESULTS: As expected, some of the reported properties correspond to previously known features of the serine protease active site, including the catalytic triad and the oxyanion hole. Novel properties are also found, including the spatial distribution of charged, polar, and hydrophobic groups arranged to stabilize the catalytic residues, and a relative abundance of some residues (Val, Tyr, Leu, and Gly) around the active site. CONCLUSIONS: Our findings show that in addition to some properties common to all the proteases examined, there are a set of preferred, but not required, properties that can be reliably observed only by aligning the sites and comparing them with carefully selected statistical controls.''", } @Article{Banatao03, author = "D.R. Banatao and R.B. Altman and T.E. Klein", title = "Microenvironment analysis and identification of magnesium binding sites in RNA", journal = "Nucleic Acids Res", year = "2003", volume = "31", number = "15", pages = "4450-4460", comment = "Used the FEATURE algorithm to determine `` novel physicochemical descriptions of site-bound and diffusely bound Mg2+ ions in RNA that are useful for prediction. Electrostatic calculations using the Non-Linear Poisson Boltzmann (NLPB) equation provided further evidence for the locations of site-bound ions. We confirmed the locations of experimentally determined sites and further differentiated between classes of ion binding. We also identified potentially important, high scoring sites in the group I intron that are not currently annotated as Mg2+ binding sites.''", } @Article{Wei03, author = "L. Wei and R.B. Altman", title = "Recognizing Complex, Asymmetric functional sites in protein structures using a Bayesian scoring function", journal = "Journal of Bioinformatics and Computational Biology", volume = "1", number = "1", year = "2003", pages = "119-138", } @Article{Liang03a, author = "M.P. Liang and D.R. Banatao and T.E. Klein and D.L. Brutlag and R.B. Altman", title = "{WebFEATURE:} An interactive web tool for identifying and visualizing functional sites on macromolecular structures", journal = "Nucleic Acids Res", year = "2003", volume = "31", number = "13", pages = "3324-3327", comment = "``WebFEATURE (http://feature.stanford.edu/webfeature/) is a web-accessible structural analysis tool that allows users to scan query structures for functional sites in both proteins and nucleic acids. WebFEATURE is the public interface to the scanning algorithm of the FEATURE package, a supervised learning algorithm for creating and identifying 3D, physicochemical motifs in molecular structures. Given an input structure or Protein Data Bank identifier (PDB ID), and a statistical model of a functional site, WebFEATURE will return rank-scored ``hits'' in 3D space that identify regions in the structure where similar distributions of physicochemical properties occur relative to the site model. Users can visualize and interactively manipulate scored hits and the query structure in web browsers that support the Chime plug-in. Alternatively, results can be downloaded and visualized through other freely available molecular modeling tools, like RasMol, PyMOL and Chimera. A major application of WebFEATURE is in rapid annotation of function to structures in the context of structural genomics.''", } @InProceedings{Banatao01, author = "D.R. Banatao and C.C. Huang and P.C. Babbitt and R.B. Altman and T.E. Klein", title = "ViewFeature: integrated feature analysis and visualization", booktitle = "Pac Symp Biocomput", year = "2001", pages = "240-250", comment = "``We have developed an extension to the molecular visualization program Chimera that integrates Feature's statistical models and site predictions with 3-dimensional structures viewed in Chimera. We call this extension ViewFeature, and it is designed to help users understand the structural Features that define a site of interest. We applied ViewFeature in an analysis of the enolase superfamily; a functionally distinct class of proteins that share a common fold, the alpha/beta barrel, in order to gain a more complete understanding of the conserved physical properties of this superfamily. In particular, we wanted to define the structural determinants that distinguish the enolase superfamily active site scaffold from other alpha/beta barrel superfamilies and particularly from other metal-binding alpha/beta barrel proteins. Through the use of ViewFeature, we have found that the C-terminal domain of the enolase superfamily does not differ at the scaffold level from metal-binding alpha/beta barrels. We are, however, able to differentiate between the metal-binding sites of alpha/beta barrels and those of other metal-binding proteins. We describe the overall architectural Features of enolases in a radius of 10 Angstroms around the active site.''", } @InProceedings{Liang03b, author = "M.P. Liang and D.L. Brutlag and R.B. Altman", title = "Automated construction of structural motifs for predicting functional sites on protein structures", booktitle = "Pac Symp Biocomput", year = "2003", pages = "204-215", comment = "``We describe a method to predict functional sites by automatically creating three dimensional structural motifs from amino acid sequence motifs. These structural motifs perform comparably well with manually generated structural motifs and perform better than sequence motifs.''", } @InProceedings{Waugh01, author = "A. Waugh and G.A. Williams and L. Wei and R.B. Altman", title = "Using meta computing tools to facilitate large-scale analyses of biological databases", booktitle = "Pac Symp Biocomput", year = "2001", pages = "360-371", comment = "``We use a distributed computing environment, Legion, to enable large-scale computations on the Protein Data Bank (PDB). In particular, we employ the Feature program to scan all protein structures in the PDB in search for unrecognized potential cation binding sites. We evaluate the efficiency of Legion's parallel execution capabilities and analyze the initial biological implications that result from having a site annotation scan of the entire PDB. We discuss four interesting proteins with unannotated, high-scoring candidate cation binding sites.''", } @InProceedings{Wei98, author = "L. Wei and R.B. Altman", title = "Recognizing protein binding sites using statistical descriptions of their 3D environments", booktitle = "Pac Symp Biocomput", year = "1998", pages = "497-508", comment = "This is the main reference for matching of binding sites with Feature. ``We have developed a new method for recognizing sites in three-dimensional protein structures. Our method is based on our previously reported algorithm for creating descriptions of protein microenvironments using physical and chemical properties at multiple levels of detail (including features at the atomic, chemical group, residue, and secondary structural levels). The recognition method takes three inputs: a set of sites that share some structural or functional role, a set of control nonsites that lack this role, and a single query site. The values of properties for the query site are compared to the distributions of values for both sites and nonsites to determine the group to which it is most similar. A log-odds scoring function, based on Bayes' Rule, computes a score that indicates the likelihood that the query region is a site of interest. In this paper, we apply the method to the task of identifying calcium binding sites in proteins. Cross-validation analysis shows that this recognition approach has high sensitivity and specificity. We also describe the results of scanning four calcium binding proteins (with the calcium removed) using a three-dimensional grid of probe points at 2 A spacing. The probe points that have high scores cluster around the true calcium binding sites, with the highest scoring points at or near the binding sites. The method fails in only one case where a calcium binding site is created by four proteins in the crystal lattice, and is thus not recognizable within the crystallographic asymmetric unit. Our results show that property-based descriptions can be used for recognizing protein sites in unannotated structures.''", } @InProceedings{Wei97, author = "L. Wei and R.B. Altman and J.T. Chang", title = "Using the radial distributions of physical features to compare amino acid environments and align amino acid sequences", booktitle = "Pac Symp Biocomput", year = "1997", pages = "465-476", comment = "``We have performed a comprehensive analysis of the microenvironments surrounding the twenty amino acids. Our analysis includes comparison of amino acid environments with random control environments as well as with each of the other amino acid environments. We describe the amino acid environments with a set of 21 features summarizing atomic, chemical group, residue, and secondary structural features. The environments are divided into radial shells of 1 A thickness to represent the distance of the features from the amino acid C beta atoms. We make the results of our analysis available graphically over the world wide web. To illustrate the validity and utility of our analysis, we used the amino acid comparative profiles to construct a substitution matrix, the WAC matrix, based on a simple summary of the computed environmental differences. We compared our matrix to BLOSUM62 and PAM250 in BLAST searches with query sequences selected from 39 protein families found in the PROSITE database. Although BLOSUM62 was the most sensitive matrix overall, our matrix was more sensitive for some families, and exhibited overall performance similar to PAM250. Our results suggest that the radial distribution of biochemical and biophysical features is useful for comparing amino acid environments, and that similarity matrices based on the geometric distribution of features around amino acids may produce improved search sensitivity.''", } @Article{Yoon07, title = "Clustering protein environments for function prediction: finding {PROSITE} motifs in {3D}", author = "S. Yoon and J.C. Ebert and E-Y. Chung and G. {De Micheli} and R.B. Altman", journal = "{BMC} Bioinformatics", volume = "8", number = "4", year = "2007", url = "http://infoscience.epfl.ch/getfile.py?recid=99253&mode=best", } @Article{Ota03, author = "M. Ota and K. Kinoshita and K. Nishikawa", title = "Prediction of Catalytic Residues in Enzymes Based on Known Tertiary Structure, Stability Profile, and Sequence Conservation", journal = "Journal of Molecular Biology", volume = "327", number = "5", year = "2003", pages = "1053-1064", abstract = "The catalytic or functionally important residues of a protein are known to exist in evolutionarily constrained regions. However, the patterns of residue conservation alone are sometimes not very informative, depending on the homologous sequences available for a given query protein. Here, we present an integrated method to locate the catalytic residues in an enzyme from its sequence and structure. Mutations of functional residues usually decrease the activity, but concurrently often increase stability. Also, catalytic residues tend to occupy partially buried sites in holes or clefts on the molecular surface. After confirming these general tendencies by carrying out statistical analyses on 49 representative enzymes, these data together with amino acid conservation were evaluated. This novel method exhibited better sensitivity in the prediction accuracy than traditional methods that consider only the residue conservation. We applied it to some so-called ''hypothetical'' proteins, with known structures but undefined functions. The relationships among the catalytic, conserved, and destabilizing residues in enzymatic proteins are discussed.", } @Article{Kinoshita05, author = "K. Kinoshita and M. Ota", title = "{P-cats:} prediction of catalytic residues in proteins from their tertiary structures", journal = "Bioinformatics", volume = "21", number = "17", year = "2005", pages = "3570-3571", abstract = "Summary: P-cats is a web server that predicts the catalytic residues in proteins from the atomic coordinates. P-cats receives a coordinate file of the tertiary structure and sends out analytical results via e-mail. The reply contains a summary and two URLs to allow the user to examine the conserved residues: one for interactive images of the prediction results and the other for a graphical view of the multiple sequence alignment. Availability: P-cats is freely available at http://p-cats.hgc.jp/p-cats. Contact: kino@ims.u-tokyo.ac.jp", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with pseudo-atoms and matching with association graphs %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Artymiuk94, author = "P.J. Artymiuk and A.R. Poirrette and H.M. Grindley and D.W. Rice and P. Willett", title = "A Graph-Theoretic Approach to the Identification of {3-Dimensional} Patterns of Amino-Acid Side-Chains in Protein Structures", journal = "Journal of Molecular Biology", volume = "243", year = "1994", pages = "327-344", comment = "``This paper discusses the use of graph-theoretic methods for the representation and searching of three-dimensional patterns of side-chains in protein structures. The position a side-chain is represented by pseudo-atoms, and the relative positions of pairs of side-chains by the distances between them. This description of the geometry can be represented by a labelled graph in which the nodes and the edges of the graph represent the pseudo-atoms and the sets of inter-pseudo-atomic distances, respectively. Given such a representation, a protein can be searched for the presence of a user-defined query pattern of side-chains by means of a subgraph-isomorphism algorithm which is implemented in the program ASSAM.''", } @Article{Schmitt02, author = "S. Schmitt and D. Kuhn and G. Klebe", title = "A new method to detect related function among proteins independent of sequence and fold homology", journal = "J Mol Biol", year = "2002", volume = "323", pages = "387-406", comment = "This is the main reference for pseudo-centers. Also, describes Cavbase. Finds maximal clique in association graph to match sets of pseudo-centers.", } @Article{Weskamp04, author = "N. Weskamp and D. Kuhn and E. Hullermeier and G. Klebe", title = "Efficient similarity search in protein structure databases by k-clique hashing", journal = "Bioinformatics", volume = "20", year = "2004", pages = "1522-1526", comment = "Describes search of Cavbase (sites represented by pseudo-centers) combining clique detection in association graphs with geometric hashing.", } @InProceedings{Weskamp03, author = "N. Weskamp and D. Kuhn and E Hellermeier and G. Klebe", title = "Efficient Similarity Search in Protein Structure Databases: Improving Clique-Detection through Clique Hashing", booktitle = "German Conference on Bioinformatics", location = "Munich, Germany", month = "October", year = "2003", abstract = "In order to make the structural comparison of protein binding sites more efficient, we propose a two-step method that combines advantages from both graph-based clique-detection and geometric hashing. The search for protein similarity is completely independent of sequence and fold information. Instead, it is based on a recent approach for the automatic extraction of binding sites from protein structures and the representation of their geometric and physicochemical properties. We also present some empirical results for similarity search in a medium-sized dataset.", } @InProceedings{Kupas04, author = "K. Kupas and A. Ultsch and G. Klebe", title = "An algorithm for finding similarities in protein active sites", booktitle = "ICBA", location = "Fort Lauderdale, FL", year = "2004", comment = "This paper is uses pseudo-centers to compare binding sites. ``The binding-site exposed physicochemical characteristics are described by assigning generic pseudocenters to the functional groups of the amino acids flanking a particular active site. These pseudocenters are assembled into small substructures. To find substructures with spatial similarity and appropriate chemical properties, an emergent self-organizing map is used for clustering. Two substructures which are found to be similar form the basis for an expanded comparison of the complete cavities. Preliminary results with four pairs of binding cavities show that similarities are detected correctly and motivatefurther studies.''", } @Article{Spriggs03, author = "R.V. Spriggs and P.J. Artymiuk and P. Willett", title = "Searching for patterns of amino acids in {3D} protein structures", journal = "J Chem Inf Comput Sci", volume = "43", year = "2003", pages = "412-421", comment = "Uses pseuedo-centers and distance subgraph isomorphism. ASSAM represents an amino acid by a vector drawn from the main chain towards the functional part of the amino acid and then computes a graph representation of a protein in which the individual side-chain vectors are the nodes and the intervector distances are the edges. The presence of a query pattern in a Protein Data Bank structure can then be searched for by means of a subgraph isomorphism algorithm.", } @Article{Brakoulias04, author = "A. Brakoulias and R.M. Jackson", title = "Towards a structural classification of phosphate binding sites in protein-nucleotide complexes: an automated all-against-all structural comparison using geometric matching", journal = "Proteins", volume = "56", number = "2", year = "2004", pages = "250-260", abstract = "A method is described for the rapid comparison of protein binding sites using geometric matching to detect similar three-dimensional structure. The geometric matching detects common atomic features through identification of the maximum common sub-graph or clique. These features are not necessarily evident from sequence or from global structural similarity giving additional insight into molecular recognition not evident from current sequence or structural classification schemes. Here we use the method to produce an all-against-all comparison of phosphate binding sites in a number of different nucleotide phosphate-binding proteins. The similarity search is combined with clustering of similar sites to allow a preliminary structural classification. Clustering by site similarity produces a classification of binding sites for the 476 representative local environments producing ten main clusters representing half of the representative environments. The similarities make sense in terms of both structural and functional classification schemes. The ten main clusters represent a very limited number of unique structural binding motifs for phosphate. These are the structural P-loop, di-nucleotide binding motif [FAD/NAD(P)-binding and Rossman-like fold] and FAD-binding motif. Similar classification schemes for nucleotide binding proteins have also been arrived at independently by others using different methods.", } @Article{Pickering01, author = "S.J. Pickering and A.J. Bulpitt and N. Efford and N.D. Gold and D.R. Westhead", title = "{AI}-based algorithms for protein surface comparisons", journal = "Comput Chem", volume = "26", year = "2001", pages = "79-84", comment = "This paper uses surface points and association graphs to match ligand binding sites.", } @Article{Milik03, author = "M. Milik and S. Szalma and K.A. Olszewski", title = "Common Structural Cliques: a tool for protein structure and function analysis", journal = "Protein Eng", volume = "16", year = "2003", pages = "543-552", comment = "``The compared protein structures are condensed to a graph representation, with atoms as nodes and distances as edge labels. Protein graphs are then compared to extract all possible Common Structural Cliques. These cliques are merged to create Structural Templates: graphs that describe structural analogies between compared proteins. Structures of serine endopeptidases were compared in pairs using the presented algorithm with different geometrical parameters.''", } @Article{Wangikar03, author = "P.P. Wangikar and A.V. Tendulkar and S. Ramya and D.N. Mail and S. Sarawagi", title = "Functional sites in protein families uncovered via an objective and automated graph theoretic approach", journal = "Journal of Molecular Biology", volume = "326", year = "2003", pages = "955-978", comment = "``We report a method for detection of recurring side-chain patterns (DRESPAT) using an unbiased and automated graph theoretic approach. We first list all structural patterns as sub-graphs where the protein is represented as a graph. The patterns from proteins are compared pair-wise to detect patterns common to a protein pair based on content and geometry criteria. The recurring pattern is then detected using an automated search algorithm from the all-against-all pair-wise comparison data of proteins. Intra-protein pattern comparison data are used to enable detection of patterns recurring within a protein. A method has been proposed for empirical calculation of statistical significance of recurring pattern. The method was tested on 17 protein sets of varying size, composed of non-redundant representatives from SCOP superfamilies.''", } @Article{Jambon03, author = "M. Jambon and A. Imberty and G. Deleage and C. Geourjon", title = "A new bioinformatic approach to detect common {3D} sites in protein structures", journal = "Proteins", volume = "52", year = "2003", pages = "137-145", comment = "``The basis for this method is a representation of the protein structure by a set of stereochemical groups that are defined independently from the notion of amino acid. An efficient heuristic for finding similarities that uses graphs of triangles of chemical groups to represent the protein structures has been developed.''", } @Article{Barrow76, author = "H.G. Barrow and R.M. Burstall", title = "Subgraph isomorphism, matching relational structures and maximal cliques", journal = "Inf. Process. Lett.", year = "1976", volume = "4", pages = "83-84", comment = "This is the classical paper about using association graphs to match rigid point sets", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with pseudo-atoms and matching with geometric hashing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Shulman-Peleg04, author = "A. Shulman-Peleg and R. Nussinov and H.J. Wolfson", title = "Recognition of functional sites in protein structures", journal = "J Mol Biol", year = "2004", volume = "339", pages = "607-633", comment = "Watson05: SiteEngine uses modified pseudo-centres and geometric hashing to compare surfaces with the aim of identifying conserved chemistry in similar pockets, which might indicate similar function. ``We achieve high efficiency and speed by introducing a low-resolution surface representation via chemically important surface points, by hashing triangles of physico-chemical properties and by application of hierarchical scoring schemes for a thorough exploration of global and local similarities. We proceed to rigorously apply this method to functional site recognition in three possible ways: first, we search a given functional site on a large set of complete protein structures. Second, a potential functional site on a protein of interest is compared with known binding sites, to recognize similar features. Third, a complete protein structure is searched for the presence of an a priori unknown functional site, similar to known sites.''", } @Article{Pennec98, author = "X. Pennec and and N. Ayache", title = "A geometric algorithm to find small but highly similar {3D} substructures in proteins", journal = "Bioinformatics", volume = "14", year = "1998", pages = "516-522", comment = "``We propose a new 3D substructure matching algorithm based on geometric hashing techniques. The key feature of the method is the introduction of a 3D reference frame attached to each residue.''", } @Article{Wolfson97, author = "H.J. Wolfson and I. Rigoutsos", title = "Geometric hashing: an overview", journal = "IEEE Computational Science \& Engineering", volume = "4", number = "4", year = "1997", pages = "10-21", } @InProceedings{Lamdan88, author = "Y. Lamdan and H. Wolfson", title = "Geometric hashing: a general and efficient recognition scheme", booktitle = "2nd International Conference on Computer Vision", pages = "238-251", location = "Tarpon Springs, FL", year = "1988", comment = "This is the main reference for geometric hashing" } @Article{Norel93, author = "D. Fischer and R. Norel and H. Wolfson and R. Nussinov", title = "Surface motifs by a computer vision technique: Searches, detection, and implications for protein-ligand recognition", journal = "Proteins: Structure, Function, and Genetics", volume = "16", number = "3", year = "1993", pages = "278-292", abstract = "We describe the application of a method geared toward structural and surface comparison of proteins. The method is based on the Geometric Hashing Paradigm adapted from Computer Vision. It allows for comparison of any two sets of 3-D coordinates, such as protein backbones, protein core or protein surface motifs, and small molecules such as drugs. Here we apply our method to 4 types of comparisons between pairs of molecules: (1) comparison of the backbones of two protein domains; (2) search for a predefined 3-D C? motif within the full backbone of a domain; and in particular, (3) comparison of the surfaces of two receptor proteins; and (4) comparison of the surface of a receptor to the surface of a ligand. These aspects complement each other and can contribute toward a better understandingof protein structure and biomolecular recognition. Searches for 3-D surface motifs can be carried out on either receptors or on ligands. The latter may result in the detection of pharmacophoric patterns. If the surfaces of the binding sites of either the receptors or of the ligands are relatively similar, surface superpositioning may aid significantly in the docking problem. Currently, only distance invariants are used in the matching, although additional geometric surface invariants are considered. The speed of our Geometric Hashing algorithm is encouraging, with a typical surface comparison taking only seconds or minutes of CPU time on a SUN 4 SPARC workstation. The direct application of this method to the docking problem is also discussed. We demonstrate the success of this methodin its application to two members of the globin family and to two dehydrogenases.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with atoms/residues and matching with combinatorial extension %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Ferre04, author = "F. Ferre and G. Ausiello and A. Zanzoni and M. Helmer-Citterich", title = "{SURFACE:} a database of protein surface regions for functional annotation", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "D240-D244", comment = "Describes a database of binding sites, each represented by a set of points (two per residue - CA and center of side chain). Matching is performed by a combinatoral expansion algorithm. The database is available at http://cbm.bio.uniroma2.it/surface/.", } @Article{Ivanisenko04, author = "V.A. Ivanisenko and S.S. Pintus and D.A. Grigorovich and N.A. Kolchanov", title = "{PDBSiteScan:} a program for searching for active, binding and posttranslational modification sites in the {3D} structures of proteins", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "W549-W554", comment = "Alignment of point sets with combinatorial extension (like CE).", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with atoms/residues and matching with ??? %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kobayashi97, author = "N. Kobayashi and and N. Go", title = "A method to search for similar protein local structures at ligand binding sites and its application to adenine recognition", journal = "Eur Biophys J", volume = "26", year = "1997", pages = "135-144", comment = "Utilizes bound ligand to define region of interest and align. ``We have developed a method of searching for similar spatial arrangements of atoms around a given chemical moiety in proteins that bind a common ligand. The first step in this method is to consider a set of atoms that closely surround a given chemical moiety. Then, to compare the spatial arrangements of such surrounding atoms in different proteins, they are translated and rotated so that the chemical moieties are superposed on each other. Spatial arrangements of surrounding atoms in a pair of proteins are judged to be similar, when there are many corresponding atoms occupying similar spatial positions.''", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with templates %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Jones04, author = "S. Jones and J.M. Thornton", title = "Searching for functional sites in protein structures", journal = "Curr Opin Chem Biol", year = "2004", volume = "8", pages = "3-7", comment = "Contains brief overview of template-based methods", } @Article{Torrance05, author = "J.W. Torrance and G.J. Bartlett and C.T. Porter and J.M. Thornton", title = "Using a library of structural templates to recognise catalytic sites and explore their evolution in homologous families", journal = "J Mol Biol", year = "2005", volume = "347", pages = "565-581", comment = "Watson05: The authors present a library of catalytic site structural templates based on information from the scientific literature. In an extension of previous work, a new web server is released that allows users to search the CSA using the JESS algorithm. The user can investigate a specific PDB code or submit a three-dimensional protein structure for analysis. (http://www.ebi.ac.uk/thornton-srv/databases/CSS).", } @Article{Porter04, author = "C.T. Porter and G.J. Bartlett and J.M. Thornton", title = "The {C}atalytic {S}ite {A}tlas: a resource of catalytic sites and residues identified in enzymes using structural data", journal = "Nucleic Acids Res", volume = "32", year = "2004", pages = "D129-133", comment = "This is the main reference for the Catalytic Site Atlas (CSA) (http://www.ebi.acu.k/thornton-srv/databases/CSA/index.html), a database of catalytic sites as determined by scanning the literature. A case is made that the SITE records of PDB files are used in an inconsistent fashion. So, the authors have gone through the literature and identified the catalytic residues for 177 proteins (as of the time of writing). They have also transfered annotations to 2608 proteins homologous to the originals. The paper does not demonstrate applications enabled by the database.", } @Article{Stark03a, author = "A. Stark and and R.B. Russell", title = "Annotation in three dimensions {PINTS:} Patterns in Non-homologous Tertiary Structures", journal = "Nucleic Acids Res", volume = "31", year = "2003", pages = "3341-3344", comment = "This is the main reference for PINTS.", } @Article{Stark03b, author = "A. Stark and S. Sunyaev and R.B. Russell", title = "A model for statistical significance of local similarities in structure", journal = "J Mol", volume = "Biol", year = "2003", pages = "", comment = "This provides analysis for the statistical significance of matches based on the RMSD between atom pairs. It is applied to evaluate matches for protein alignments.", } @Article{Stark04, author = "A. Stark and A. Shkumatov and R.B. Russell", title = "Finding functional sites in structural genomics proteins", journal = "Structure", year = " 2004", volume = "12", pages = "1405-1412", comment = "Watson05: The authors report the use of fold similarity and template methods to identify functional sites in a selection of proteins solved by structural genomics projects. The authors compare their method (PINTS) with two other template-based methods, PROCAT and RIGOR", } @Article{Pazos04, author = "F. Pazos and M.L.E. Sternberg", title = "Automated prediction of protein function and detection of functional sites from structure", journal = "Proc Natl Acad Sci USA", year = "2004", volume = "101", pages = "14754-14759", comment = "Watson05: The authors describe Phunctioner, a method for the automatic prediction of function. An initial structural alignment is split into functionally specific subalignments using GO annotation. The conserved residues in each subalignment are interpreted as functionally important residues and are used to construct PSSMs for scanning against a query sequence to find the best-fitting functional match. An additional benefit is that the method can identify functionally important residues for GO terms for which no such information is currently known.", } @Article{Laskowski05a, author = "R.A. Laskowski and J.D. Watson and J.M. Thornton", title = "Protein function prediction using local 3D templates", journal = "J. Mol. Biol.", volume = "351", year = "2005", pages = "614-626", } @Article{Preissner98, author = "R. Preissner and A. Goede and C. Frommel", title = "Dictionary of interfaces in proteins ({DIP}) Data bank of complementary molecular surface patches", journal = "J Mol Biol", volume = "280", year = "1998", pages = "535-550", comment = "``We defined interfaces as pairs of matching molecular surface patches between neighboring secondary structural elements. All such interfaces from known protein structures were collected in a comprehensive data bank of interfaces in proteins (DIP).The up-to-date DIP contains interface files for 351 selected Brookhaven Protein Data Bank entries with a total of about 160,000 surface elements formed by 12,475 secondary structures. ... The existing retrieval system for the DIP allows selection (out of the set of molecular patches) according to different criteria, such as geometric features, atomic composition, type of secondary structure, contacts, etc. A fast, sequence-independent 3-D superposition procedure was developed for automatic searches for geometrically similar surface areas. Using this procedure, we found a large number of structurally similar interfaces of up to 30 atoms in completely unrelated protein structures.''", } @Article{Frommel03, author = "C. Frommel and C. Gille and A. Goede and C. Gropl and S. Hougardy and T. Nierhoff and R. Preissner and M. Thimm", title = "Accelerating screening of {3D} protein data with a graph theoretical approach", journal = "Bioinformatics", volume = "19", year = "2003", pages = "2442-2447", comment = "``The Dictionary of Interfaces in Proteins (DIP) is a database collecting the 3D structure of interacting parts of proteins that are called patches. It serves as a repository, in which patches similar to given query patches can be found. In this work we address the question of how the patches similar to a given query can be identified by scanning only a small part of DIP. The answer to this question requires the investigation of the distribution of the similarity of patches.''", } @Article{Kleywegt99, author = "GJ. Kleywegt", title = "Recognition of spatial motifs in protein structures", journal = "J Mol Biol", volume = "285", year = "1999", pages = "1887-1897", comment = "This paper describes two programs: SPASM and RIGOR. SPASM matches a single structural motif (spatial arrangement of points) to a database of proteins, while RIGOR matches a single protein to many structural motifs. Each residue is represented by its CA atom and/or the centroid of its side chain. Exhaustive enumeration of possible point correspondences are enumerated exhaustively, considering points for correspondence when their residue types are within some threshold in a substitution matrix. Constraints may also be added that matching residues be in the same order in the sequence, separated by the same size gaps in the sequences, etc. For every possible set of correspondences, the point sets are superposed, and the RMSD is checked to see if it is below a threshold. Structural motifs are constructed in three ways: 1) manually, 2) all sets of residues in spatial proximity that contain only hydrophobic, only polar and charged, or mixed hydrophobic and polar/charged residues, and 3) sets of residues that all contact a single hetero-compound. Applications are shown for a few cases of main-chain recognition, active-site recongition, and metal-binding site recognition. (http://alpha2.bmc.uu.se/usf)."} } @Article{Singh03, author = "R. Singh and M. Saha", title = "Identifying structural motifs in proteins", journal = "Pacific Symposium on Biocomputing", volume = "8", year = "2003", pages = "228-239", } @Article{Masden02, author = "D. Masden and J. Kleywegt", title = "Interactive motif and fold recognition in protein structures", journal = "J. Appl. Cryst.", volume = "35", year = "2002", pages = "137-139", } @Article{Dawe03, author = "J.H. Dawe and C.T. Porter and J.M. Thornton and A.B. Tabor", title = "A template search reveals mechanistic similarities and differences in -ketoacyl synthases (KAS) and related enzymes", journal = "Proteins", volume = "52", year = "2003", pages = "427-435", } @Article{Hamelryck03, author = "T Hamelryck", title = "Efficient identification of side-chain patterns using a multidimensional index tree", journal = "Proteins", volume = "51", year = "2003", pages = "96-108", } @Article{Russell98, author = "RB. Russell", title = "Detection of protein three-dimensional side-chain patterns: new examples of convergent evolution", journal = "J Mol Biol", volume = "279", year = "1998", pages = "1211-1227", } @Article{Barker03, author = "J.A. Barker and and J.M. Thornton", title = "An algorithm for constraint-based structural template matching: application to {3D} templates with statistical analysis", journal = "Bioinformatics", volume = "19", year = "2003", pages = "1644-1649", comment = "This is the main reference for JESS, an improved version of TESS. It includes an empirical measure of statistical significance for every match.", } @Article{Wallace97, author = "A.C. Wallace and N. Borkakoti and J.M. Thornton", title = "{TESS}: A geometric hashing algorithm for deriving {3D} coordinate templates for searching structural databases. Application to enzyme active sites", year = "1997", journal = "Prot. Sci", volume = "6", number = 11, pages = "2308-2323", comment = "This is the main reference for TESS, the most commonly cited template method for representing binding sites. Templates are spatial arrangements of attributed points typical of a particular binding site. In this paper, they seem to be constructed manually with some knowledge of the binding specificity of particular enzyme classes. The paper describes a geometric hashing strategy for searching a protein for matches to a given template - where a coordinate frame is considered for each residue (rather than for every possible triple) with axes chosen in a manner specific to every amino acid. The method is shown for proteins having HIS-based catalytic triads, ribonucleases, and lysosomes.", } @Article{Wallace96, author = "A.C. Wallace and R.A. Laskowski and J.M. Thornton", title = "Derivation of {3D} coordinate templates for searching structural databases: Application to {Ser-His-Asp} catalytic triads in the serine proteinases and lipases", journal = "Protein Science", volume = "5", year = "1996", pages = "1001-1013", comment = "This paper is a precursor to Wallace97.", } @Article{Jonassen02, author = "I. Jonassen and I. Eidhammer and D. Conklin and W.R. Taylor", title = "Structure motif discovery and mining the {PDB}", journal = "Bioinformatics", volume = "18", year = "2002", pages = "362-367", } @Article{Jonassen99, author = "I. Jonassen and I. Eidhammer and W.R. Taylor", title = "Discovery of local packing motifs in protein structures", journal = "Proteins", volume = "34", year = "1999", pages = "206-219", } @Article{Bradley02, author = "P. Bradley and P.S. Kim and B. Berger", title = "{TRILOGY:} Discovery of sequence-structure patterns across diverse proteins", journal = "Proc Natl Acad Sci U S A", volume = "99", year = "2002", pages = "8500-8505", } @Article{Oldfield02, author = "T.J. Oldfield", title = "Data mining the protein data bank residue interactions", journal = "Proteins", year = "2002", volume = "49", pages = "510-528", } @Misc{Binkowski03a, author = "T.A. Binkowski and P. Freeman and J. Liang", title = "{pvSoar}", howpublished = {http://pvsoar.bioengr.uic.edu}, year = "2003", } @Article{Binkowski03c, author = "T.A. Binkowski and L. Adamian and J. Liang", title = "Inferring functional relationships of proteins from local sequence and spatial surface patterns", journal = "J Mol Biol", volume = "332", year = "2003", pages = "505-526", } @Article{Binkowski04, author = "T.A. Binkowski and P. Freeman and J. Liang", title = "{pvSOAR}: detecting similar surface patterns of pocket and void surfaces of amino acid residues on proteins", journal = "Nucleic Acids Res", year = "2004", volume = "32", pages = "W555-W558", comment = "Watson05: The pvSOAR web server identifies similar surface regions among proteins, and takes advantage of the CASTp database of pockets and cavities. The authors describe how the server can be used to predict the function of hypothetical proteins, illustrating this with the E coli BioH protein (PDB code 1m33) as an example", } @Article{Hamelryck03, author = "T. Hamelryck", title = "Efficient identification of side-chain patterns using a multidimensional index tree", journal = "Proteins", volume = "51", number = "1", year = "2003", pages = "96-108", abstract = "Convergent evolution often produces similar functional sites in nonhomologous proteins. The identification of these sites can make it possible to infer function from structure, to pinpoint the location of a functional site, to identify enzymes with similar enzymatic mechanisms, or to discover putative functional sites. In this article, a novel method is presented that (a) queries a database of protein structures for the occurrence of a given side chain pattern and (b) identifies interesting side-chain patterns in a given structure. For efficiency and to make a robust statistical evaluation of the significance of a similarity possible, patterns of three residues (or triads) are considered. Each triad is encoded as a high-dimensional vector and stored in an SR (Sphere/Rectangle) tree, an efficient multidimensional index tree. Identifying similar triads can then be reformulated as identifying neighboring vectors. The method deals with many features that otherwise complicate the identification of meaningful patterns: shifted backbone positions, conservative substitutions, various atom label ambiguities and mirror imaged geometries. The combined treatment of these features leads to the identification of previously unidentified patterns. In particular, the identification of mirror imaged side-chain patterns is unique to the here-described method. Interesting triads in a given structure can be identified by extracting all triads and comparing them with a database of triads involved in ligand binding. The approach was tested by an all-against-all comparison of unique representatives of all SCOP superfamilies. New findings include mirror imaged metal binding and active sites, and a putative active site in bacterial luciferase.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with surfaces and matching with association graphs %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Hofbauer04, author = "C. Hofbauer and H. Lohninger and A. Aszodi", title = "{SURFCOMP:} A Novel Graph-Based Approach to Molecular Surface Comparison", journal = "J. Chem. Inf. Comput. Sci.", year = "2004", volume = "44", pages = "837-847", comment = "This paper presents ``an approach that uses maximal common subgraph comparison and harmonic shape image matching to detect locally similar regions between two molecular surfaces augmented with properties such as the electrostatic potential or lipophilicity.'' The complexity of the problem is reduced by a set of filters that eliminate potential correpsondences between vertices with different intramolecular distances, different electrostatic potentials, different lipophilic potentials, different principal curvatures on the Connolly surfaces, different harmonic shape images for their neighborhoods on the Connolly surfaces, and different orientations of the aligned harmonic shape images with respect to the line segment between any pair of points. ``The approach was tested on dihydrofolate reductase and thermolysin inhibitors and was shown to recover the correct alignments of the compounds bound in the active sites.''", } @Article{Kinoshita05, author = "K. Kinoshita and and H. Nakamura", title = "Identification of the ligand binding sites on the molecular surface of proteins", journal = "Protein Science", volume = "14", year = "2005", pages = "711-718", comment = "This paper contains results of an experiment for comparison of a few binding site surfaces to a large database (almost all binding site surfaces in the PDB) using the method described in [Kinoshita03]. Since partial surfaces are matched, a similarity scoring method is introduced that considers both a normalized score for the match of the geometry and electrostatics (Z-score) and the ``coverage'' of the match (fractions of the surfaces found to be in correspondence). Results are presented for 18 hypothetical proteins.", } @Article{Kinoshita04, author = "K. Kinoshita and H. Nakamura", title = "{eF-site and PDBViewer}: database and viewer for protein functional sites", journal = "Bioinformatics", year = "2004", volume = "20", pages = "1329-1330", comment = "http://ef-site.hgc.jp/eF-site/", } @Article{Kinoshita03, author = "K. Kinoshita and and H. Nakamura", title = "Identification of protein biochemical functions by similarity search using the molecular surface database {eF-site}", journal = "Protein Science", volume = "12", year = "2003", pages = "1589-1595", comment = "This paper describes matching of surfaces stored in the eF-site database of binding sites. Each binding site is represented by a mesh with electrostatic potential and the 2 principal curvatures at every vertex. The meshes are matched using association graphs, where the electrostatic potentials and principal curvatures have to match within some threshold, as well as the intramolecular distances. No reduction of the point set is performed (e.g., using critical points). Results are shown for matching examples of two SCOP folds and for predicting the biochemical function of one hypothetical protein.", } @Article{Kinoshita02, author = "K. Kinoshita and J. Furui and H. Nakamura", title = "Identification of Protein Functions from a Molecular Surface Database, {eF}-site", year = "2002", journal = "J. Struct. Func. Genomics", volume = "2", number = 1, pages = "9-Binding", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with surfaces and matching with geometric hashing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Lin94, author = "S.L. Lin and R. Nussinov and D. Fischer and H.J. Wolfson", title = "Molecular-Surface Representations By Sparse Critical-Points", journal = "Proteins-Structure Function and Genetics", volume = "18", year = "1994", pages = "94-101", comment = "This paper describes a surface representation consisting of ``a limited number of critical points disposed at key locations over the surface. These points adequately represent the shape and the important characteristics of the surface, despite the fact that they are modest in number.'' Using this representation, they investigate protein-protein and protein-small molecule docking." } @Article{Rosen98, author = "M. Rosen and S.L. Lin and H. Wolfson and R. Nussinov", title = "Molecular shape comparisons in searches for active sites and functional similarity", journal = "Protein Engineering", volume = "11", year = "1998", pages = "263-277", comment = "This paper uses geometric hashing to examine ``the reliability of surface comparisons in searches for active sites in proteins. Specifically, we compare the efficacy of molecular surface comparisons with comparisons of surface atoms and of C(alpha) backbone atoms. We further investigate comparisons of specific atoms, belonging to a predefined pattern of catalytic residues versus comparisons of molecular surfaces and, separately, of surface atoms. We also explore active site comparisons versus comparisons in which the entire molecular surfaces are scanned. While here we focus on the geometrical aspect of the problem, we also investigate the effect of adding residue labels in these comparisons. Our extensive studies cover the serine proteases, containing the highly conserved triad motif, and the chorismate mutases. Our results show that molecular surface comparisons work best when the similarity is high. As the similarity deteriorates, the number of potential solutions increases rapidly, making their ranking difficult, particularly when scanning entire molecular surfaces. Utilizing atomic coordinates directly appears more adequate under such circumstances.''", } @Article{Fischer93, author = "D. Fischer and R. Norel and H. Wolfson and R. Nussinov", title = "Surface motifs by a computer vision technique: searches, detection, and implications for protein-ligand recognition", journal = "Proteins", volume = "16", year = "1993", pages = "278-292", comment = "This paper uses geometric hashing to perform ``4 types of comparisons between pairs of molecules: (1) comparison of the backbones of two protein domains; (2) search for a predefined 3-D C alpha motif within the full backbone of a domain; and in particular, (3) comparison of the surfaces of two receptor proteins; and (4) comparison of the surface of a receptor to the surface of a ligand. ... Searches for 3-D surface motifs can be carried out on either receptors or on ligands.''", } @Article{Bachar93, author = "O. Bachar and D. Fischer and R. Nussinov and H. Wolfson", title = "A Computer Vision-Based Technique For {3-D} Sequence-Independent Structural Comparison Of Proteins", journal = "Protein Engineering", volume = "6", year = "1993", pages = "279-288", comment = "Uses geometric hashing", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with surfaces and matching with genetic algorithms %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Poirrette97, author = "A.R. Poirrette and P.J. Artymiuk and D.W. Rice and P. Willett", title = "Comparison of protein surfaces using a genetic algorithm", journal = "Journal of Computer-Aided Molecular Design", volume = "11", year = "1997", pages = "557-569", comment = "``A genetic algorithm (GA) is described which is used to compare the solvent-accessible surfaces of two proteins or fragments of proteins, represented by a dot surface calculated using the Connolly algorithm. The GA is used to move one surface relative to the other to locate the most similar surface region between the two. The matching process is enhanced by the use of the surface normals and shape terms provided by the Connolly program and also by a simple hydrogen-bonding descriptor and an additional shape descriptor. The algorithm has been tested in applications ranging from the comparison of small surface patches to the comparison of whole protein surfaces. Examples of the matches are given and a quantitative analysis of the quality of the matches is performed.''", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with radial extents and matching with spherical harmonic surfaces %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kahraman07, author = "A. Kahraman and R.J. Morris and R. Laskowski and J.M. Thornton", title = "Shape Variation in Protein Binding Pockets and their Ligands", journal = "J. Mol. Biol.", year = "2007", volume = "368", pages = "283-301", comment = "A common assumption about the shape of protein binding pockets is that they are related to the shape of the small ligand molecules that can bind there. But to what extent is that assumption true? Here we use a recently developed shape matching method to compare the shapes of protein binding pockets to the shapes of their ligands. We find that pockets binding the same ligand show greater variation in their shapes than can be 2 John Innes Centre, Norwich accounted for by the conformational variability of the ligand. This suggests Research Park, Colney Lane, that geometrical complementarity in general is not sufficient to drive Norwich, NR7 7UH, UK molecular recognition. Nevertheless, we show when considering only shape and size that a significant proportion of the recognition power of a binding pocket for its ligand resides in its shape. Additionally, we observe a buffer zone or a region of free space between the ligand and protein, which results in binding pockets being on average three times larger than the ligand that they bind.", } @Article{Morris05a, author = "R.J. Morris and R.J. Najmanovich and A. Kahraman and J.M. Thornton", title = "Real spherical harmonic expansion coefficients as {3D} shape descriptors for protein binding pocket and ligand comparisons", journal = "Bioinformatics", volume = "21", number = "10", year = "2005", pages = "2347-2355", comment = "This paper ``uses the coefficients of a real spherical harmonics expansion to describe the shape of a protein's binding pocket.'' Binding sites are represented by the radial extent of surfnet spheres within 3.5 angstroms of a conserved residue. The resulting spherical functions are aligned with PCA, only the highest order spherical harmonic coefficients are retained, and shape similarity is computed as the L2 distance between corresponding spherical harmonic coefficients." } @InProceedings{Morris05b, author = "R.J. Morris and A. Kahraman and T. Funkhouser and R. Najmanovich and G. Stockwell and F. Glaser and R. Laskowski and J.M. Thornton", title = "Binding Pocket Shape Analysis for Protein Function Prediction", booktitle = "{LASR} Workshop on Quantitative Biology, Shape Analysis, and Wavelets", location = "Leeds England", month = "June", year = "2005", } @Article{Cai02, author = "W. Cai and X. Shao and B. Maigret", title = "Protein-ligand recognition using spherical harmonic molecular surfaces: towards a fast and efficient filter for large virtual throughput screening", journal = "Journal of Molecular Graphics and Modeling", volume = "20", pages = "313-328", year = "2002", comment = "``In this paper, we present an extension of our work to spherical harmonic surfaces in order to approximate molecular surfaces of both ligands and receptor-cavities and to easily check the surface-shape complementarity. The method consists of (1) finding lobes and holes on both ligand and cavity surfaces using contour maps of radius functions with spherical harmonic expansions, (2) superposing the surfaces around a given binding site by minimizing the distance between their respective expansion coefficients. This docking procedure capabilities was demonstrated by application to 35 protein-ligand complexes of known crystal structures.''" } @Article{Cai98, author = "W. Cai and M. Zhang and B. Maigret", title = "New approach for representation of molecular surface", journal = "J. Comput. Chem", volume = "19", year = "1998", pages = "1805-1815", } @Article{Ritchie99, author = "D.W. Ritchie and G.J.L. Kemp", title = "Fast computation, rotation, and comparison of low resolution spherical harmonic molecular surfaces", journal = "J. Comput. Chem", volume = "20", year = "1999", pages = "383-395", comment = "Describes Fourier search algorithm for optimal rotational alignment", } @Article{Duncan93a, author = "B.S. Duncan and A.J. Olson", title = "Shape analysis of molecular surfaces", journal = "Biopolymers", volume = "33", year = "1993", pages = "231-238", } @Article{Duncan93b, author = "B.S. Duncan and A.J. Olson", title = "Approximation and characterization of molecular surfaces", journal = "Biopolymers", volume = "33", year = "1993", pages = "219-229", } @Article{Leicester88, author = "S. Leicester and J.L. Finney and R.P. Bywater", title = "Description of molecular surface shape using {F}ourier descriptors", journal = "J. Mol. Graph", volume = "6", year = "1988", pages = "104-108", } @Article{Max88, author = "N.L. Max and E.D. Getzoff", title = "Spherical harmonic molecular surfaces", journal = "IEEE Comput. Graph. Appl", volume = "8", year = "1988", pages = "42-50", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations and matching with other methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Goldman00, author = "B.B. Goldman and W.T. Wipke", title = "{Quadratic Shape Descriptors} 1. Rapid Superposition of Dissimilar Molecules Using Geometrically Invariant Surface Descriptors", journal = "J. Chem. Inf. Model.", volume = "40:, number = "3", year = "2000", pages = "644-658", } @InProceedings{Funkhouser05b, author = "T. Funkhouser and F. Glaser and R. Laskowski and R. Morris and R. Najmanovich and G. Stockwell and J.Thornton", title = "Shape-Based Classification of Bound Ligands", booktitle = "{LASR} Workshop on Quantitative Biology, Shape Analysis, and Wavelets", location = "Leeds England", month = "June", year = "2005", } @Article{Exner02a, author = "T.E. Exner and M. Keil and J. Brickmann", title = "Pattern recognition strategies for molecular surfaces. {I.} Pattern generation using fuzzy set theory", journal = "Journal of Computational Chemistry", volume = "23", year = "2002", pages = "1176-1187", } @Article{Exner02b, author = "T.E. Exner and M. Keil and J. Brickmann", title = "Pattern recognition strategies for molecular surfaces. {II.} Surface complementarity", journal = "Journal of Computational Chemistry", volume = "23", year = "2002", pages = "1188-1197", comment = "``Fuzzy logic based algorithms for the quantitative treatment of complementarity of molecular surfaces are presented. ... The algorithms are applied to 33 biomolecular complexes. ... After the optimization with a downhill simplex method, for all these complexes one structure was found, which is in very good agreement with the experimental results.''", } @InProceedings{Gu03, author = "X. Gu and {S.-T.} Yau", title = "Surface Classification Using Conformal Structures", booktitle = "Ninth {IEEE} International Conference on Computer Vision ({ICCV'03})", volume = "1", year = "2003", pages = "701", comment = "This paper provides a way to map a surface from 3D to 2D (flatten it) while retaining the angles between edges of the mesh as best as possible (a conformal map). The surfaces are compared/classified in the 2D domain.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with alpha-shapes %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Liang98a, author = "J. Liang and H. Edelsbrunner and P. Fu and P.V. Sudhakar and S. Subramaniam", year = "1998", title = "Analytical shape computing of macromolecules I: molecular area and volume through alpha shape", journal = "Proteins", volume = "33", pages = "1-17", } @Article{Liang98b, author = "J. Liang and H. Edelsbrunner and P. Fu and P.V. Sudhakar and S. Subramaniam", title = "Analytical shape computing of macromolecules II: identification and computation of inaccessible cavities inside proteins", journal = "Proteins", volume = "33", year = "1998", pages = "18-29", } @Article{Liang98c, author = "J. Liang and H. Edelsbrunner and and C. Woodward", year = "1998", title = "Anatomy of protein pockets and cavities: Measurement of binding site geometry and implications for ligand design", journal = "Protein Science", volume = "7", pages = "1884-1897", } @Article{Edelsbrunner98, author = "H. Edelsbrunner and M. Facello and J. Liang", title = "On the definition and the construction of pockets in macromolecules", journal = "Disc. Appl. Math", volume = "88", pages = "83-102", year = "1998", } @InProceedings{Edelsbrunner95, author = "H. Edelsbrunner and M. Facello and R. Fu and J. Liang", title = "Measuring Proteins and Voids in Proteins", booktitle = "Proceedings of the 28th Annual Hawaii International Conference on Systems Science", year = "1995", pages = "256-264", } @Article{Binkowski03b, author = "T.A. Binkowski and S. Naghibzadeh and J. Liang", title = "{CASTp:} {C}omputed {A}tlas of {S}urface {T}opography of {p}roteins", journal = "Nucleic Acids Res", year = "2003", volume = "31", pages = "3352-3355", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representation with grids and matching with correlation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Katchalski-Katzir92, author = "E. Katchalski-Katzir and I. Shariv and M. Eisenstein and A.A. Friesem and C. Aflalo and I.A. Vakser", title = "Molecular surface recognition: determination of geometric fit between proteins and their ligands by correlation techniques", journal = "Proc. Natl. Acad. Sci. U.S.A", volume = "89", year = "1992", pages = "2195-2199", comment = "Rasterizes molecules into grid. Discretely samples rotations. Uses correlation in Fourier domain to search for best translation.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Representation of binding sites with flexible structures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Pitman01, title = "{FLASHFLOOD:} A {3D} field-based similarity search and alignment method for flexible molecules", author = "M.C. Pitman and W.K. Huber and H. Horn and A. Kramer and J.E. Rice and W.C. Swope", journal = "J Comput Aided Mol Des", year = "2001", month = "July", volume = "15", number = "7", pages = "587-612", comment = "This is Wolfgang's paper" } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site mapping with probes %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Goodford85, author = "P.J. Goodford", title = "A computational procedure for determining energetically favorable binding sites on biologically important macromolecules", journal = "J. Med. Chem.", volume = "28", year = "1985", pages = "849-857", comment = "Uses GRID", } @Article{Kastenholz00, author = "M.A. Kastenholz and M. Pastor and G. Cruciani and E.E. Haaksma and T. Fox", title = "{GRID/CPCA:} a new computational tool to design selective ligands", journal = "J Med Chem", volume = "43", year = "2000", pages = "3033-3044", comment = "Uses GRID to understand similarities/differences between binding sites", } @Article{Reynolds89, author = "C.A. Reynolds and R.C. Wade and P.J. Goodford", title = "Identifying targets for bioreductive agents: using GRID to predict selective binding regions of proteins", journal = "J Mol Graph.", volume = "7", number = "2", month = "Jun", year = "1989", pages = "103-108" } @Article{Ruppert97, author = "J. Ruppert and W. Welch and A. Jain", title = "Automatic identification and representation of protein binding sites for molecular docking", journal = "Protein Science", volume = "6", year = "1997", pages = "524-533", comment = "This paper presents an algorithm for representing a protein's binding site in a way that is specifically suited to molecular docking applications. Initially the protein's surface is coated with a collection of molecular fragments that could potentially interact with the protein. Each fragment, or probe, serves as a potential alignment point for atoms in a ligand, and is scored to represent that probe's affinity for the protein. Probes are then clustered by accumulating their affinities, where high affinity clusters are identified as being the ``stickiest'' portions of the protein surface. The stickiest cluster is used as a computational binding ``pocket'' for docking.", } @Article{Pastor97, author = "Manuel Pastor and Gabriele Cruciani and Kimberly A. Watson", title = "A Strategy for the Incorporation of Water Molecules Present in a Ligand Binding Site into a Three-Dimensional Quantitative Structure-Activity Relationship Analysis", journal = "J. Med. Chem", year = "1997", volume = "40", number = "25", pages = "4089-4102", comment = "Uses GRID descriptors input in statistical procedures like CoMFA, GOLPE or SIMCA for QSAR or 3D-QSAR analyses", } @Misc{GRID, author = {Molecular Discovery}, title = "GRID", howpublished = {http://www.moldiscovery.com/soft\_grid.php}, year = "2005", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site mapping with multiple copy simultaneous search (MCSS) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Miranker91, author = "A. Miranker and M. Karplus", title = "Functionality maps of binding sites: a multiple copy simultaneous search method", journal = "Proteins", volume = "11", year = "1991", pages = "29-34", comment = "This is the main reference for the multiple copy simultaneous search (MCSS) method. ``A new method is proposed for determining energetically favorable positions and orientations for functional groups on the surface of proteins with known three-dimensional structure. From 1,000 to 5,000 copies of a functional group are randomly placed in the site and subjected to simultaneous energy minimization and/or quenched molecular dynamics. The resulting functionality maps of a protein receptor site, which can take account of its flexibility, can be used for the analysis of protein ligand interactions and rational drug design. Application of the method to the sialic acid binding site of the influenza coat protein, hemagglutinin, yields functional group minima that correspond with those of the ligand in a cocrystal structure.''", } @Article{Kortvelyesi03, author = "T. Kortvelyesi and M. Silberstein and S. Dennis and S. Vajda", title = "Improved mapping of protein binding sites", journal = "J Comput Aided Mol Des", volume = "17", year = "2003", pages = "173-186", } @Article{Mattos96, author = "Carla Mattos and Dagmar Ringe", title = "Locating and characterizing binding sites on proteins", journal = "Nature Biotechnology", volume = "14", year = "1996", pages = "595-599", comment = "``This review article begins with a discussion of fundamental differences between substrates and inhibitors, and some of the assumptions and goals underlying the design of a new ligand to a target protein. An overview is given of the methods currently used to locate and characterize ligand binding sites on protein surfaces, with focus on a novel approach: multiple solvent crystal structures (MSCS). In this method, the X-ray crystal structure of the target protein is solved in a variety of organic solvents. Each type of solvent molecule serves as a probe for complementary binding sites on the protein. The probe distribution on the protein surface allows the location of binding sites and the characterization of the potential ligand interactions within these sites. General aspects of the application of the MSCS method to porcine pancreatic elastase is discussed, and comparison of the results with those from X-ray crystal structures of elastase/inhibitor complexes is used to illustrate the potential of the method in aiding the process of rational drug design.''", } @Article{Stultz99, author = "C.M. Stultz and Martin Karplus", title = "{MCSS} Functionality Maps for a Flexible Protein", journal = "Proteins, Structure Function and Gentetics", volume = "37", year = "1999", pages = "512-529", } @Book{Evensen97, author = "E. Evensen and D. {Joseph-McCarthy} and M. Karplus", title = "MCSS version 2.1", publisher = "Harvard University", location = "Cambridge, MA USA", year = "1997", } @Article{Caflisch93, author = "A. Caflisch and A. Miranker and M. Karplus", title = "Multiple copy simultaneous search and construction of ligands in binding sites: application to inhibitors of HIV-1 aspartic proteinase", journal = "J. Med. Chem.", volume = "36", pages = "2142-2167", year = "1993", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site mapping with knowledge-based algorithms %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Evers03, author = "A. Evers and H.Gohlke and G. Klebe", title = "Ligand-supported Homology Modelling of Protein Binding-sites using Knowledge-based Potentials", journal = "J. Mol. Biol.", year = "2003", volume = "334", pages = "327-345", abstract = "A new approach, MOBILE, is presented that models protein binding-sites including bound ligand molecules as restraints. Initially generated, homology models of the target protein are refined iteratively by including information about bioactive ligands as spatial restraints and optimising the mutual interactions between the ligands and the binding-sites. Thus optimised models can be used for structure-based drug design and virtual screening. In a first step, ligands are docked into an averaged ensemble of crude homology models of the target protein. In the next step, improved homology models are generated, considering explicitly the previously placed ligands by defining restraints between protein and ligand atoms. These restraints are expressed in terms of knowledge-based distance-dependent pair potentials, which were compiled from crystallographically determined protein-ligand complexes. Subsequently, the most favourable models are selected by ranking the interactions between the ligands and the generated pockets using these potentials. Final models are obtained by selecting the best-ranked side-chain conformers from various models, followed by an energy optimisation of the entire complex using a common force-field. Application of the knowledge-based pair potentials proved efficient to restrain the homology modelling process and to score and optimise the modelled protein-ligand complexes. For a test set of 46 protein-ligand complexes, taken from the Protein Data Bank (PDB), the success rate of producing near-native binding-site geometries (rmsd , 2.0 A ° ) with MODELLER is 70\% when the ligand restrains the homology modelling process in its native orientation. Scoring these complexes with the knowledge-based potentials, in 66\% of the cases a pose with rmsd ,2.0 A ° is found on rank 1. Finally, MOBILE has been applied to two case studies modelling factor Xa based on trypsin and aldose reductase based on aldehyde reductase.", } @Article{Sotriffer02a, author = "C. Sotriffer and G. Klebe", title = "Identification and mapping of smallmolecule binding sites in proteins: computational tools for structure-based drug design", journal = "Farmaco", volume = "57", year = "2002", pages = "243-251", } @Article{Verdonk01, author = "M.L. Verdonk and J.C. Cole and P. Watson and V. Gillet and P. Willett", title = "Superstar: improved knowledge-based interaction fields for protein binding sites", journal = "Journal of Molecular Biology ", volume = "307", number = "3", month = "Mar", year = "2001", pages = "841-859", } @Article{Laskowski96, author = "R.A. Laskowski and J.M. Thornton and C. Humblet and J. Singh", title = "{X-SITE:} use of empirically derived atomic packing preferences to identify favourable interaction regions in the binding sites of proteins", journal = "J. Mol. Biol", volume = "259", pages = "175-201", year = "1996", comment = "This is the main reference for XSITE", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand binding site representations with strings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Karlin96, author = "S. Karlin and and Z.Y. Zhu", title = "Characterizations of diverse residue clusters in protein three-dimensional structures", journal = "Proc Natl Acad Sci U S A", volume = "93", year = "1996", pages = "8344-8349", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Halperin02, author = "I. Halperin and B. Ma and Haim Wolfson and Ruth Nussinov", title = "Principles of Docking: An Overview of Search Algorithms and a Guide to Scoring Functions", journal = "Proteins: Structure, Function, and Genetics", volume = "47", pages = "409-443", year = "2002", } @Article{Taylor02b, author = "R.D. Taylor and P.J. Jewsbury and J.W. Essex", title = "A review of protein-small molecule docking methods", journal = "Journal of Computer-Aided Molecular Design", volume = "16", pages = "151-166", year = "2002", } @Article{Krovat05, author = "E.M. Krovat and T. Steindl and T. Langer", title = "Recent Advances in Docking and Scoring", journal = "Current Computer-Aided Drug Design", volume = "1", number = "1", year = "2005", pages = "93-102", abstract = "This review is focused on recent advances and new aspects in the field of molecular docking and scoring, and it covers multiple applications and case studies. Basic requirements and different algorithms for docking are briefly discussed. Moreover, parameters that influence docking results, combination of different docking algorithms and scoring functions, performance of scoring functions, docking using homology models, and ligand and protein flexibility are examined to give an overview of the state-of-the-art methods and a survey of innovative approaches in molecular docking and scoring. Regarding the enormous amount of literature in this field we restrict ourselves on an overview of several important advances in docking and scoring techniques published within the last two years, i.e. we considered publications ranging from 2002 to 2004.", } @Article{Brooijmans03, author = "N. Brooijmans and I.D. Kuntz", title = "Molecular recognition and docking algorithms", journal = "Annu Rev Biophys Biomol Struct", volume = "32", year = "2003", pages = "335-373", abstract = "Molecular docking is an invaluable tool in modern drug discovery. This review focuses on methodological developments relevant to the field of molecular docking. The forces important in molecular recognition are reviewed and followed by a discussion of how different scoring functions account for these forces. More recent applications of computational chemistry tools involve library design and database screening. Last, we summarize several critical methodological issues that must be addressed in future developments.", } @Article{Kroemer03, author = "R.T. Kroemer", title = "Molecular modelling probes: docking and scoring", journal = "Biochemical Society Transactions", year = "2003", volume = "31", number = "5", pages = "980-984", abstract = "A general introduction to molecular modelling techniques in the area of protein-ligand interactions is given. Methods covered range from binding-site analysis to statistical treatment of sets of ligands. The main focus of this paper is on docking and scoring. After an outline of the main concepts, two specific application examples are given.", } @Article{Kitchen04, author = "D.B. Kitchen and H. Decornez and J.R. Furr and J.D.B. Bajorath", title = "Docking and scoring in virtual screening for drug discovery: methods and applications", journal = "Nature Rev. Drug Discov.", volume = "3", number = "11", pages = "935-949", year = "2004", abstract = "Computational approaches that 'dock' small molecules into the structures of macromolecular targets and 'score' their potential complementarity to binding sites are widely used in hit identification and lead optimization. Indeed, there are now a number of drugs whose development was heavily influenced by or based on structure-based design and screening strategies, such as HIV protease inhibitors. Nevertheless, there remain significant challenges in the application of these approaches, in particular in relation to current scoring schemes. Here, we review key concepts and specific features of small-molecule-protein docking methods, highlight selected applications and discuss recent advances that aim to address the acknowledged limitations of established approaches.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with Monte Carlo simulated annealing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Friesner04, author = "R.A. Friesner and J.L. Banks and R.B. Murphy and T.A. Halgren and J.J. Klicic and D.T. Mainz and M.P. Repasky and E.H. Knoll and M. Shelley and J.K. Perry and D.E. Shaw and P. Francis and P.S. Shenkin", title = "Glide: A New Approach for Rapid, Accurate Docking and Scoring. {1. M}ethod and Assessment of Docking Accuracy", journal = "J. Med. Chem", year = "2004", volume = "47", pages = "1739-1749", comment = "This is the main reference for GLIDE", } @Article{Liu99, author = "M. Liu and S. Wang", title = "{MCDOCK:} A Monte Carlo simulation approach to the molecular docking problem", journal = "Journal of Computer-Aided Molecular Design", volume = "13", number = "5", year = "1999", pages = "435-451", comment = "This is the main reference for MCDock.", } @Article{Goodsell90, author = "D.S. Goodsell and A.J. Olson", title = "Automated Docking of Substrates to Proteins by Simulated Annealing", journal = "Proteins: Str. Func. and Genet.", volume = "8", year = "1990", pages = "195-202", comment = "This is the main reference for AutoDock 1.0.", } @Article{Mcmartin97, author = "C. Mcmartin and R.S. Bohacek", title = "{QXP:} Powerful, rapid computer algorithms for structure-based drug design", journal = "Journal of Computer-Aided Molecular Design", volume = "11", number = "4", year = "1997", pages = "333-344", comment = "This is the main reference for QXP.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with genetic algorithms %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Jones97, author = "G. Jones and P. Willett and R.C. Glen and A.R. Leach and R. Taylor", title = "Development and Validation of a Genetic Algorithm for Flexible Docking", journal = "J. Mol. Biol.", volume = "267", year = "1997", pages = "727-748", comment = "This is the main reference for GOLD." } @Article{Verdonk03, author = "M.L. Verdonk and J.C. Cole and M.J. Hartshorn and C.W. Murray and R. D. Taylor", title = "Improved Protein-Ligand Docking Using GOLD", journal = "Proteins", volume = "52", year = "2003", pages = "609-623", comment = "This is a more recent paper about GOLD.", } @Article{Morris98, author = "G.M. Morris and D.S. Goodsell and R.S. Halliday and R. Huey and W.E. Hart and R.K. Belew and A.J. Olson", title = "Automated Docking Using a Lamarckian Genetic Algorithm and and Empirical Binding Free Energy Function", journal = "J. Computational Chemistry", volume = "19", year = "1998", pages = "1639-1662", comment = "This is the main reference for AutoDock 3.0 (http://www.scripps.edu/mb/olson/dock/autodock/).", } @Article{Oshiro95, author = "C.M. Oshiro and I.D. Kuntz", title = "Flexible ligand docking using a genetic algorithm", journal = "J. Comput-Aided Mol. Design", volume = "9", pages = "113-130", year = "1995", comment = "DOCK", } @Article{Yang04, author = "J.M. Yang and C.C. Chen", title = "{GEMDOCK:} A generic evolutionary method for molecular docking", journal = "Proteins: Structure, Function, and Bioinformatics", year = "2004", pages = "288-304", comment = "This is the main reference for GemDock.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with incremental construction %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Rarey96, author = "M. Rarey and B. Kramer and T. Lengauer and G. Klebe", year = "1996", title = "A Fast Flexible Docking Method using an Incremental Construction Algorithm", journal = "Journal of Molecular Biology", volume = "261", number = "3", pages = "470-489", comment = "This is the main reference for FlexX (http://www.biosolveit.de/FlexX/).", } @Article{Zavodsky02, author = "M.I. Zavodszky and P.C. Sanschagrin and R.S. Korde, and L.A. Kuhn", title = "Distilling the essential features of a protein surface for improving protein-ligand docking, scoring, and virtual screening", journal = "J. Comput. Aided Mol. Des.", volume = "16", year = "2002", pages = "883-902", comment = "This is the main reference for SLIDE.", } @Article{Jain03, author = "A.N. Jain", title = "Surflex: fully automatic flexible molecular docking using a molecular similarity-based search engine", journal = "J Med Chem", volume = "46", year = "2003", pages = "499-511", comment = "This is the main reference for Surflex.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with systematic search %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Misc{FRED, author = "{Open Eye Scientific Software}", title = "{FRED:} Fast Rigid Exhaustive Docking", howpublished = "http://www.eyesopen.com/docs/html/fred/", comment = "This is the main reference for FRED.", year = "2005", comment = "This is the main reference for FRED, which docks ligands in proteins using precomputed ligand conformations and systematic search over translations and rotations.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with tabu search methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Baxter97, author = "C.A. Baxter and C.W. Murray and D.E. Clark and D.R. Westhead and M.D. Eldridge", title = "Flexible docking using TABU search and an empirical estimate of binding affinity", journal = "Proteins", volume = "33", year = "1997", pages = "367-382", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with multiconformers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{McGann03, author = "M. McGann and H. Almond and A. Nicholls and J.A. Grant and F. Brown", title = "Gaussian Docking Functions", journal = "Biopolymers", volume = "68", year = "2003", pages = "76-90", comment = "FRED", } @InProceedings{Choi05, author = "V. Choi", title = "Yucca: An Efficient Algorithm for Small Molecule Docking", booktitle = "Algorithms in Molecular Biology ({AlgBio2005})", year = "2005", pages = "to appear", comment = "This is the main reference for Yucca.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking by consensus %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Paul02, author = "N. Paul and D. Rognan", title = "{ConsDock:} A new program for the consensus analysis of protein-ligand interactions", journal = "Proteins", volume = "47", number = "4", year = "2002", pages = "521-533", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking with ... %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kuntz82, author = "I.D. Kuntz and J.M. Blaney and S.J. Oatley and R. Langridge and T.E. Ferrin", title = "A geometric approach to macromolecule-ligand interactions", journal = "J. Mol. Biol", volume = "161", year = "1982", pages = "269-288", comment = "This is the main reference for the first docking program (Dock 1.0).", } @Article{Jackson02, author = "R.M. Jackson", title = "{Q-fit:} a probabilistic method for docking molecular fragments by sampling low energy conformational space", journal = "J Comput Aided Mol Des", volume = "16", pages = "43-57", year = "2002", } @Article{Welch96, author = "W. Welch and J. Ruppert and A.N. Jain", title = "Hammerhead: fast, fully automated docking of flexible ligands to protein binding sites", journal = "Chemistry \& Biology", volume = "3", number = "6", year = "1996", pages = "449-462", comment = "This is the main reference for Hammerhead.", abstract = "Background: Molecular docking seeks to predict the geometry and affinity of the binding of a small molecule to a given protein of known structure. Rigid docking has long been used to screen databases of small molecules, because docking techniques that account for ligand flexibility have either been too slow or have required significant human intervention. Here we describe a docking algorithm, Hammerhead, which is a fast, automated tool to screen for the binding of flexible molecules to protein binding sites.Results: We used Hammerhead to successfully dock a variety of positive control ligands into their cognate proteins. The empirically tuned scoring function of the algorithm predicted binding affinities within 1.3 log units of the known affinities for these ligands. Conformations and alignments close to those determined crystallographically received the highest scores. We screened 80 000 compounds for binding to streptavidin, and biotin was predicted as the top-scoring ligand, with other known ligands included among the highest-scoring dockings. The screen ran in a few days on commonly available hardware.Conclusions: Hammerhead is suitable for screening large databases of flexible molecules for binding to a protein of known structure. It correctly docks a variety of known flexible ligands, and it spends an average of only a few seconds on each compound during a screen. The approach is completely automated, from the elucidation of protein binding sites, through the docking of molecules, to the final selection of compounds for assay.", } @Article{Abagyan94, author = "R. Abagyan and M. Totrov and D. Kuznetsov", title = "ICM - A new method for protein modeling and design: Applications to docking and structure prediction from the distorted native conformation", journal = "Journal of Computational Chemistry", volume = "15", number = "5", year = "1994", pages = "488-506", comment = "This is the main reference for ICM", } @Article{Schoichet92, author = "B.K. Shoichet and D.L. Bodian and I.D. Kuntz", title = "Molecular docking using shape descriptors", journal = "J. Comp. Chem.", volume = "13", number = "3", year = "1992", pages = "380-397", comment = "DOCK", } @Article{Meng92, author = "E.C. Meng and B.K. Shoichet and I.D. Kuntz", title = "Automated docking with grid-based energy evaluation", journal = "J. Comp. Chem.", volume = "13", year = "1992", pages = "505-524", comment = "DOCK (http://www.cmpharm.ucsf.edu/kuntz/dockinfo.html)", } @Article{Meng93, author = "E.C. Meng and D.A. Gschwend and J.M. Blaney and I.D. Kuntz", title = "Orientational sampling and rigid-body minimization in molecular docking", journal = "Proteins", volume = "17", number = "3", pages = "266-278", year = "1993", comment = "DOCK", } @Article{Gschwend96, author = "D.A. Gschwend and I.D. Kuntz", title = "Orientational sampling and rigid-body minimization in molecular docking, revisited: On-the-fly optimization and degeneracy removal", journal = "J. Comput-Aided Mol. Design", year = "1996", comment = "DOCK", } @Article{Shoichet93, author = "B.K. Shoichet and I.D. Kuntz", title = "Matching chemistry and shape in molecular docking", journal = "Protein Engineering", volume = "6", year = "1993", pages = "223-232", comment = "DOCK", } @Article{Ewing01, author = "T.J.A. Ewing and S. Makino and A.G. Skillman and I.D. Kuntz", title = "{Dock 4.0:} Search strategies for automated molecular docking of flexible molecule databases", journal = "J. Comp. Aided Mol. Design", volume = "15", year = "2001", pages = "411-428", comment = "This is the main reference for Dock 4.0.", } @Article{Roche01, author = "O. Roche and R. Kiyama and C.L. {Brooks, III}", title = "Ligand-protein database: Linking protein-ligand complex structures to binding data", journal = "J. Med. Chem.", volume = "44", year = " 2001", pages = "3592-3598", } @Article{Marai04, author = "C. Marai", title = "Accommodating Protein Flexibility in Computational Drug Design", journal = "Mol Pharmacol", volume = "57", number = "2", year = "2004", pages = "213-218", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand docking evaluations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kellenberger04, author = "E. Kellenberger and J. Rodrigo and P. Muller and D. Rognan", title = "Comparative evaluation of eight docking tools for docking and virtual screening accuracy", journal = "Proteins", volume = "57", number = "2", year = "2004", pages = "225-242", } @Article{Kontoyianni04a, author = "M. Kontoyianni and L.M. McClellan et al.", title = "Evaluation of Docking Performance: Comparative Data on Docking Algorithms", journal = "J Med Chem", volume = "47", number = "3", year = "2004", pages = "558-565", abstract = "Docking molecules into their respective 3D macromolecular targets is a widely used method for lead optimization. However, the best known docking algorithms often fail to position the ligand in an orientation close to the experimental binding mode. It was reported recently that consensus scoring enhances the hit rates in a virtual screening experiment. This methodology focused on the top-ranked pose, with the underlying assumption that the orientation/conformation of the docked compound is the most accurate. In an effort to eliminate the scoring function bias, and assess the ability of the docking algorithms to provide solutions similar to the crystallographic modes, we investigated the most known docking programs and evaluated all of the resultant poses. We present the results of an extensive computational study in which five docking programs (FlexX, DOCK, GOLD, LigandFit, Glide) were investigated against 14 protein families (69 targets). Our findings show that some algorithms perform consistently better than others, and a correspondence between the nature of the active site and the best docking algorithm can be found." } @Article{Kontoyianni04b, author = "M. Kontoyianni and G.S. Sokol and L.M.McClellan", title = "Evaluation of library ranking efficacy in virtual screening", journal = "Journal of Computational Chemistry", volume = "26", number = "1", year = "2004", pages = "11-22", abstract = "We present the results of a comprehensive study in which we explored how the docking procedure affects the performance of a virtual screening approach. We used four docking engines and applied 10 scoring functions to the top-ranked docking solutions of seeded databases against six target proteins. The scores of the experimental poses were placed within the total set to assess whether the scoring function required an accurate pose to provide the appropriate rank for the seeded compounds. This method allows a direct comparison of library ranking efficacy. Our results indicate that the LigandFit/Ligscore1 and LigandFit/GOLD docking/scoring combinations, and to a lesser degree FlexX/FlexX, Glide/Ligscore1, DOCK/PMF (Tripos implementation), LigandFit1/Ligscore2 and LigandFit/PMF (Tripos implementation) were able to retrieve the highest number of actives at a 10\% fraction of the database when all targets were looked upon collectively. We also show that the scoring functions rank the observed binding modes higher than the inaccurate poses provided that the experimental poses are available. This finding stresses the discriminatory ability of the scoring algorithms, when better poses are available, and suggests that the number of false positives can be lowered with conformers closer to bioactive ones.", } @Article{Perola04, author = "E. Perola and W.P. Walters and P.S. Charifson", title = "A detailed comparison of current docking and scoring methods on systems of pharmaceutical relevance", journal = "Proteins", volume = "56", number = "2", year = "2004", pages = "235-249", abstract = "A thorough evaluation of some of the most advanced docking and scoring methods currently available is described, and guidelines for the choice of an appropriate protocol for docking and virtual screening are defined. The generation of a large and highly curated test set of pharmaceutically relevant protein-ligand complexes with known binding affinities is described, and three highly regarded docking programs (Glide, GOLD, and ICM) are evaluated on the same set with respect to their ability to reproduce crystallographic binding orientations. Glide correctly identified the crystallographic pose within 2.0 Ã… in 61\% of the cases, versus 48\% for GOLD and 45\% for ICM. In general Glide appears to perform most consistently with respect to diversity of binding sites and ligand flexibility, while the performance of ICM and GOLD is more binding site-dependent and it is significantly poorer when binding is predominantly driven by hydrophobic interactions. The results also show that energy minimization and reranking of the top N poses can be an effective means to overcome some of the limitations of a given docking function. The same docking programs are evaluated in conjunction with three different scoring functions for their ability to discriminate actives from inactives in virtual screening. The evaluation, performed on three different systems (HIV-1 protease, IMPDH, and p38 MAP kinase), confirms that the relative performance of different docking and scoring methods is to some extent binding site-dependent. GlideScore appears to be an effective scoring function for database screening, with consistent performance across several types of binding sites, while ChemScore appears to be most useful in sterically demanding sites since it is more forgiving of repulsive interactions. Energy minimization of docked poses can significantly improve the enrichments in systems with sterically demanding binding sites. Overall Glide appears to be a safe general choice for docking, while the choice of the best scoring tool remains to a larger extent system-dependent and should be evaluated on a case-by-case basis.", } @Article{Warren05, author = "G.L. Warren and C.W. Andrews and A.M. Capelli and B. Clarke and J. LaLonde and M.H. Lambert and M. Lindvall and N. Nevins and S.F. Semus and S. Senger and G. Tedesco and I.D. Wall and J.M. Woolven and C.E. Peishoff and M.S. Head", title = "A critical assessment of docking programs and scoring functions", journal = "J. Med. Chem.", volume = "ASAP Article 10.1021/jm050362n", month = "August", year = "2005", abstract = "Docking is a computational technique that samples conformations of small molecules in protein binding sites; scoring functions are used to assess which of these conformations best complements the protein binding site. An evaluation of 10 docking programs and 37 scoring functions was conducted against eight proteins of seven protein types for three tasks: binding mode prediction, virtual screening for lead identification, and rank-ordering by affinity for lead optimization. All of the docking programs were able to generate ligand conformations similar to crystallographically determined protein/ligand complex structures for at least one of the targets. However, scoring functions were less successful at distinguishing the crystallographic conformation from the set of docked poses. Docking programs identified active compounds from a pharmaceutically relevant pool of decoy compounds; however, no single program performed well for all of the targets. For prediction of compound affinity, none of the docking programs or scoring functions made a useful prediction of ligand binding affinity.", } @Article{Erickson04, author = "J.A. Erickson and M. Jalaie and D.H. Robertson and R.A. Lewis and M. Vieth", title = "Lessons in Molecular Recognition: The Effects of Ligand and Protein Flexibility on Molecular Docking Accuracy", journal = "J. Med. Chem.", volume = "47", number = "1", year = "2004", pages = "45 -55", abstract = "The key to success for computational tools used in structure-based drug design is the ability to accurately place or dock a ligand in the binding pocket of the target of interest. In this report we examine the effect of several factors on docking accuracy, including ligand and protein flexibility. To examine ligand flexibility in an unbiased fashion, a test set of 41 ligand-protein cocomplex X-ray structures were assembled that represent a diversity of size, flexibility, and polarity with respect to the ligands. Four docking algorithms, DOCK, FlexX, GOLD, and CDOCKER, were applied to the test set, and the results were examined in terms of the ability to reproduce X-ray ligand positions within 2.0Å heavy atom root-mean-square deviation. Overall, each method performed well (>50\% accuracy) but for all methods it was found that docking accuracy decreased substantially for ligands with eight or more rotatable bonds. Only CDOCKER was able to accurately dock most of those ligands with eight or more rotatable bonds (71\% accuracy rate). A second test set of structures was gathered to examine how protein flexibility influences docking accuracy. CDOCKER was applied to X-ray structures of trypsin, thrombin, and HIV-1-protease, using protein structures bound to several ligands and also the unbound (apo) form. Docking experiments of each ligand to one average structure and to the apo form were carried out, and the results were compared to docking each ligand back to its originating structure. The results show that docking accuracy falls off dramatically if one uses an average or apo structure. In fact, it is shown that the drop in docking accuracy mirrors the degree to which the protein moves upon ligand binding.", } @Article{Zavodszky05, author = "M.I. Zavodszky and L. Kuhn", title = "Lessons from Docking Validation", journal = "submitted for publication", year = "2005", url = "http://www.bch.msu.edu/labs/kuhn/web/publication_papers/pdf/Zavodszky_Kuhn_JMedChem_2005.pdf", } @Article{Bursulaya03, author = "B.D. Bursulaya and M. Totrov and R. Abagyan and C.L. {Brooks, III}", title = "Comparative study of several algorithms for flexible ligand docking", journal = "J Comput. Aided Mol. Des", volume = "17", year = "2003", pages = "755-763", } @Article{Nissink02, author = "J.W.M. Nissink and C. Murray and M. Hartshorn and M.L. Verdonk and J.C. Cole and R. Taylor", title = "A new test set for validating predictions of protein-ligand interaction", journal = "Proteins", volume = "49", number = "4", year = "2002", pages = "457-471", } @Article{Bissantz00, author = "C. Bissantz and G. Folkers and D. Rognan", title = "Protein-based virtual screening of chemical databases. 1. Evaluation of different docking/scoring combinations", journal = "J. Med. Chem.", volume = "43", year = "2000", pages = "4759-4767", abstract = "Three different database docking programs (Dock, FlexX, Gold) have been used in combination with seven scoring functions (Chemscore, Dock, FlexX, Fresno, Gold, Pmf, Score) to assess the accuracy of virtual screening methods against two protein targets (thymidine kinase, estrogen receptor) of known three-dimensional structure. For both targets, it was generally possible to discriminate about 7 out of 10 true hits from a random database of 990 ligands. The use of consensus lists common to two or three scoring functions clearly enhances hit rates among the top 5\% scorers from 10\% (single scoring) to 25-40\% (double scoring) and up to 65-70\% (triple scoring). However, in all tested cases, no clear relationships could be found between docking and ranking accuracies. Moreover, predicting the absolute binding free energy of true hits was not possible whatever docking accuracy was achieved and scoring function used. As the best docking/consensus scoring combination varies with the selected target and the physicochemistry of target-ligand interactions, we propose a two-step protocol for screening large databases: (i) screening of a reduced dataset containing a few known ligands for deriving the optimal docking/consensus scoring scheme, (ii) applying the latter parameters to the screening of the entire database.", } @Article{Perez01, author = "C. Perez and A.R. Ortiz", title = "Evaluation of docking functions for protein-ligand docking", journal = "J. Med. Chem.", volume = "44", year = "2001", pages = "3768-3785", } @Article{Vieth98a, author = "M. Vieth and J. Hirst and B.N. Dominy and H. Daigler and C.L. {Brooks, III}", title = "Assessing search strategies for flexible docking", journal = "J. Comput. Chem.", volume = "19", year = "1998", pages = "1623-1631", } @Article{Ha00, author = "S. Ha and R. Andreani and A. Robbins and I. Muegge", title = "Evaluation of docking/scoring approaches: A comparative study based on {MMP3} inhibitors", journal = "Journal of Computer-Aided Molecular Design", volume = "14", number = "5", year = "2000", pages = "435-448", abstract = "An increasing number of docking/scoring programs are available that use different sampling and scoring algorithms. A reliable scoring function is the crucial element of such approaches. Comparative studies are needed to evaluate their current capabilities. DOCK4 with force field and PMF scoring as well as FlexX were used to evaluate the predictive power of these docking/scoring approaches to identify the correct binding mode of 61 MMP-3 inhibitors in a crystal structure of stromelysin and also to rank them according to their different binding affinities. It was found that DOCK4/PMF scoring performs significantly better than FlexX and DOCK4/FF in both ranking ligands and predicting their binding modes. Most notably, DOCK4/PMF was the only scoring/docking approach that found a significant correlation between binding affinity and predicted score of the docked inhibitors. However, comparing only those cases where the correct binding mode was identified (scoring highest among sampled poses), FlexX showed the best `fine tuning' (lowest rmsd) in predicted binding modes. The results suggest that not so much the sampling procedure but rather the scoring function is the crucial element of a docking program.", } @Article{Schulz-Gasch03, author = "T. Schulz-Gasch and M. Stahl", title = "Binding site characteristics in structure-based virtual screening: evaluation of current docking tools", journal = "Journal of Molecular Modeling", volume = "9", number = "1", year = "2003", pages = "47-57", abstract = "Two new docking programs FRED (OpenEye Scientific Software) and Glide (Schrödinger, Inc.) in combination with various scoring functions implemented in these programs have been evaluated against a variety of seven protein targets (cyclooxygenase-2, estrogen receptor, p38 MAP kinase, gyrase B, thrombin, gelatinase A, neuraminidase) in order to assess their accuracy in virtual screening. Sets of known inhibitors were added to and ranked relative to a random library of drug-like compounds. Performance was compared in terms of enrichment factors and CPU time consumption. Results and specific features of the two new tools are discussed and compared to previously published results using FlexX (Tripos, Inc.) as a docking engine. In addition, general criteria for the selection of docking algorithms and scoring functions based on binding-site characteristics of specific protein targets are proposed.", } @Article{Merlitz02, author = "H. Merlitz and W. Wenzel", title = "Comparison of stochastic optimization methods for receptor-ligand docking", journal = "Chemical Physics Letters", volume = "362", number = "3", year = "2002", pages = "271-277", abstract = "We compare the efficiency of three stochastic optimization methods, simulated annealing, parallel tempering and stochastic tunneling to locate the global minima of complex and rugged potential energy surfaces arising from atomistic models for receptor-ligand docking. The stochastic tunneling method proves to be the most efficient generic approach for atomistic receptor-ligand docking in the rigid ligand - rigid receptor approximation.", } @Article{McConkey02, author = "B. McConkey and V. Sobolev and M. Edelman", title = "The performance of current methods in ligand-protein docking", journal = "Current Science", volume = "83", number = "7", year = "2002", pages = "845-856", abstract = "Computer-based methods for predicting the structure of ligand-protein complexes or docking algorithms have application in both drug design and the elucidation of biochemical pathways. The number of solved structures of ligand-protein complexes now permits the testing and validation of docking algorithms, by comparison of predicted complexes with structures extracted from protein databases. This paper outlines the methodologies and compares their performance in predicting the structure of ligand-protein complexes.", } @Article{Cummings05, author = "M.D. Cummings and R.L. DesJarlais and A.C. Gibbs and V. Mohan and E.P. Jaeger", title = "Comparison of Automated Docking Programs as Virtual Screening Tools", journal = "J. Med. Chem.", year = "2005", volume = "48", pages = "962-976", comment = "Related to data set provided by Joe Corkery", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Gohlke02, author = "H.Gohlke and G. Klebe", title = "Approaches to the description and prediction of the binding affinity of small-molecule ligands to macromolecular receptors", journal = "Angew. Chem., Int. Ed.", volume = "41", year = "2002", pages = "2644-2676", } @InCollection{Buhm02, author = "H.J. Buhm and M. Stahl", title = "The use of scoring functions in drug discovery applications", booktitle = "Reviews in Computational Chemistry", publisher = "Wiley-VCH", location = "New York", volume = "18", year = "2002", pages = "41-87", } @Article{Tame05, author = "J. Tame", title = "Scoring Functions -- the First 100 Years", journal = "Journal of Computer-Aided Molecular Design", volume = "19", number = "6", month = "June", year = "2005", pages = "445-451", abstract = "The use of simple linear mathematical models to estimate chemical properties is not a new idea. Albert Einstein used very simple ‘gravity-like' forces to explain the capillarity of different liquids in 1900-1901. Today such models are used in more complicated situations, and a great many have been developed to analyse interactions between proteins and their ligands. This is not surprising, since proteins are too complicated to model accurately without lengthy numerical analysis, and simple models often do at least as good a job in predicting binding constants as much more computationally expensive methods. One hundred years after Einstein’s ‘miraculous year’ in which he transformed physics, it is instructive to recall some of his even earlier work. As approximations, ‘scoring functions’ are excellent, but it is dangerous to read too much into them. A few cautionary tales are presented for the beginner to the field of ligand affinity prediction by linear models.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring force field methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Brooks83, author = "Bernhard R. Brooks and Robert E. Bruccoleri and Barry D. Olafson and David J. States and S. Swaminathan and Martin Karplus", title = "{CHARMM:} A program for macromolecular energy, minimization, and dynamics calculations", journal = "J. Comp. Chem", volume = "4", number = "2", pages = "187-217", year = "1983", comment = "This is the main reference for CHARMM.", } @Article{Cornell95, author = "W.D. Cornell and P. Cieplak and C.I. Bayly and I.R. Gould and K.M. {Merz, Jr.} and D.M. Ferguson and D.C. Spellmeyer and T. Fox and J.W. Caldwell and P.A. Kollman", title = "A Second Generation Force Field for the Simulation of Proteins, Nucleic Acids, and Organic Molecules", journal = "Journal of the American Chemical Society", volume = "117", number = "19", year = "1995", pages = "5179-5197", comment = "This is the main reference for AMBER(?)", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring with empirical methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Eldridge97, author = "M.D. Eldridge and C.W. Murray and T.R. Auton and G.V. Paolini and R.P. Mee", title = "Empirical scoring functions. {I:} The development of a fast empirical scoring function to estimate the binding affinity of ligands in receptor complexes", journal = "J. Comput.-Aided Mol. Des.", volume = "11", year = "1997", pages = "425-445", comment = "This is the main reference for ChemScore.", } @Article{Boehm94, author = "H.J. Boehm", title = "The development of a simple empirical scoring function to estimate the binding constant for a protein-ligand complex of known three-dimensional structure", journal = "J. Comput.-Aided Mol. Des.", volume = "8", year = "1994", pages = "243-256", } @Article{Buhm98, author = "H.J. Buhm", title = "Prediction of binding constants of protein ligands: A fast method for the prioritization of hits obtained from de novo design or 3d database search programs", journal = "J. Comput.-Aided Mol. Des.", volume = "12", year = "1998", pages = "309-323", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring with knowledge-based methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Gohlke00, author = "H. Gohlke and M. Hendlich and G. Klebe", title = "Knowledge-based scoring function to predict protein-ligand interactions", journal = "J. Mol. Biol.", volume = "295", year = "2000", pages = "337-356", comment = "This is the main reference for DrugScore, a knowledge-based scoring method.", } @Article{Mitchell99a, author = "J.B.O. Mitchell and R. Laskowski and A. Alex and J.M. Thornton", title = "{BLEEP} - potential of mean force describing protein-ligand interactions: I. Generating potential", journal = "J. Comput. Chem.", volume = "20", number = "11", year = "1999", pages = "1165-1176", comment = "This is the main reference for BLEEP.", abstract = "We have developed BLEEP (biomolecular ligand energy evaluation protocol), an atomic level potential of mean force (PMF) describing protein-ligand interactions. The pair potentials for BLEEP have been derived from high-resolution X-ray structures of protein-ligand complexes in the Brookhaven Protein Data Bank (PDB), with a careful treatment of homology. The use of a broad variety of protein-ligand structures in the derivation phase gives BLEEP more general applicability than previous potentials, which have been based on limited classes of complexes, and thus represents a significant step forward. We calculate the distance distributions in protein-ligand interactions for all 820 possible pairs that can be chosen from our set of 40 different atom types, including polar hydrogen. We then use a reverse Boltzmann methodology to convert these into energy-like pair potential functions. Two versions of BLEEP are calculated, one including and one excluding interactions between protein and water. The pair potentials are found to have the expected forms; polar and hydrogen bonding interactions show minima at short range, around 3.0 Å, whereas a typical hydrophobic interaction is repulsive at this distance, with values above 4.0 Å being preferred.", } @Article{Mitchell99b, author = "J.B.O. Mitchell and R. Laskowski and A. Alex and J.M. Thornton", title = "{BLEEP} - potential of mean force describing protein-ligand interactions: II. Calculation of binding energies and comparison with experimental data", journal = "J. Comput. Chem.", volume = "20", number = "11", year = "1999", pages = "1177-1185", } @Article{Nobeli01, author = "I. Nobeli and J.B.O. Mitchell and A. Alex and J.M. Thornton", title = "Evaluation of a Knowledge-Based Potential of Mean Force for Scoring Docked Protein-Ligand Complexes", journal = "Journal of Computational Chemistry", volume = "22", number = "7", year = "2001", pages = "673-688", } @Article{Muegge99, author = "I. Muegge and Y.C. Martin", title = "A general and fast scoring function for protein-ligand interactions: A simplified potential approach", journal = "J. Med. Chem.", volume = "42", year = "1999", pages = "791-804", } @Article{Tanaka76, author = "S. Tanaka and H.A. Scheraga", title = "Medium- and long-range interaction parameters between amino acids for predicting three-dimensional structures of proteins", journal = "Macromolecules", volume = "9", pages = "945-950", comment = "Early paper on data-driven scoring", abstract = "In a previous paper, a hypothesis for protein folding was proposed in which the native structure is formed by a three-step mechanism: (A) formation of ordered backbone structures by short-range interactions, (B) formation of small contact regions by medium-range interactions, and (C) association of the small contact regions into the native structure by long-range interactions. In this paper the empirical interaction parameters, used as a measure of the medium- and long-range interactions (the standard free energy, deltaGdegrees k,l, of formation of a contact between amino acids of species k and l) that include the role of the solvent (water) and determine the conformation of a protein in steps B and C, are evaluated from the frequency of contacts in the x-ray structures of native proteins. The numerical values of deltaG degrees k,l for all possible pairs of the 20 naturally occurring amino acids are presented. Contacts between highly nonpolar side chains of amino acids such as Ile, Phe, Trp, and Leu are shown quantitatively to be stable. On the contrary, contacts involving polar side chains of amino acids such as Ser, Asp, Lys, and Glu are significantly less stable. While this implies, in a quantitative manner, that it is generally more favorable for nonpolar groups to lie in the interior of the protein molecule and for the polar side chains to be exposed to the solvent (water) rather than to form contacts with other amino acids, many exceptions to this generalization are observed.", } @Article{Ge05, author = "W. Ge and B. Schneider and W.K. Olson", title = "Knowledge-Based Elastic Potentials for Docking Drugs or Proteins with Nucleic Acids", journal = "Biophysical Journal", volume = "88", year = "2005", pages = "1166-1190", abstract = "Elastic ellipsoidal functions defined by the observed hydration patterns around the DNA bases provide a new basis for measuring the recognition of ligands in the grooves of double-helical structures. Here a set of knowledge-based potentials suitable for quantitative description of such behavior is extracted from the observed positions of water molecules and amino acid atoms that form hydrogen bonds with the nitrogenous bases in high resolution crystal structures. Energies based on the displacement of hydrogen-bonding sites on drugs in DNA-crystal complexes relative to the preferred locations of water binding around the heterocyclic bases are low, pointing to the reliability of the potentials and the apparent displacement of water molecules by drug atoms in these structures. The validity of the energy functions has been further examined in a series of sequence substitution studies based on the structures of DNA bound to polyamides that have been designed to recognize the minor-groove edges of Watson-Crick basepairs. The higher energies of binding to incorrect sequences superimposed (without conformational adjustment or displacement of polyamide ligands) on observed high resolution structures confirm the hypothesis that the drug subunits associate with specific DNA bases. The knowledge-based functions also account satisfactorily for the measured free energies of DNA-polyamide association in solution and the observed sites of polyamide binding on nucleosomal DNA. The computations are generally consistent with mechanisms by which minor-groove binding ligands are thought to recognize DNA basepairs. The calculations suggest that the asymmetric distributions of hydrogen-bond-forming atoms on the minor-groove edge of the basepairs may underlie ligand discrimination of G·C from C·G pairs, in addition to the commonly believed role of steric hindrance. The analysis of polyamide-bound nucleosomal structures reveals other discrepancies in the expected chemical design, including unexpected contacts to DNA and modified basepair targets of some ligands. The ellipsoidal potentials thus appear promising as a mathematical tool for the study of drug- and protein-DNA interactions and for gaining new insights into DNA-binding mechanisms.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring by consensus %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Charifson99, author = "P.S. Charifson and J.J. Corkery and M.A. Murcko and W.P. Walters", title = "Consensus scoring: A method for obtaining improved hit rates from docking databases of three-dimensional structures into proteins", journal = "J. Med. Chem.", volume = "42", year = "1999", pages = "5100-5109", } @Article{Clark02, author = "R.D. Clark and A. Strizhev and J.M. Leonard and J.F. Blake and J.B. Matthew", title = "Consensus scoring for ligand/protein interactions", journal = " Journal of Molecular Graphics and Modelling", volume = "20", number = "4", year = "2002", pages = "281-295", } @Article{Paul02, author = "N. Paul and D. Rognan", title = "ConsDock: A new program for the consensus analysis of protein-ligand interactions", journal = "Proteins", volume = "47", year = "2002", pages = "521-533", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-ligand scoring evaluations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Marsden04, author = "P.M. Marsden and D. Puvanendrampillai and J.B.O. Mitchell and R.C. Glen", title = "Predicting protein ligand binding affinities: a low scoring game?", journal = "Organic Biomolecular Chemistry", volume = "2", year = "2004", pages = "3267-3273", comment = "Compares binding affinities predicted by several scoring functions to measured values and finds poor correlations.", } @Article{Ferrara04, author = "P. Ferrara and H. Gohlke and D.J. Price and G. Klebe and C.L. {Brooks, III}", title = "Assessing Scoring Functions for Protein-Ligand Interactions", journal = "J. Med. Chem.", volume = "47", year = "2004", pages = "3032-3047", } @Article{Xing04, author = "L. Xing and E. Hodgkin and Q. Liu and D. Sedlock", title = "Evaluation and application of multiple scoring functions for a virtual screening experiment", journal = "J Comput. Aided Mol. Des", volume = "18", year = "2004", pages = "333-344", } @Article{Wang03, author = "R. Wang and Y. Lu and S. Wang", title = "Comparative evaluation of 11 scoring functions for molecular docking", journal = "J. Med. Chem.", volume = "46", year = "2003", pages = "2287-2303", } @Article{Wei02, author = "B.Q. Wei and W.A. Baase and L.H. Weaver and B.W. Matthews and B.K. Shoichet", title = "A model binding site for testing scoring functions in molecular docking", journal = "J. Mol. Biol.", volume = "322", year = "2002", pages = "339-355", } @Article{Stahl01, author = "M. Stahl and M. Rarey", title = "Detailed analysis of scoring functions for virtual screening", journal = "J. Med. Chem.", volume = "44", year = "2001", pages = "1035-1042", } @Article{Vieth98b, author = "M. Vieth and J. Hirst and A. Kolinski and C.L. {Brooks, III}", title = "Assessing energy functions for flexible docking", journal = "J. Comput. Chem.", volume = "19", year = "1998", pages = "1612-1622", } @Article{Sotriffer02b, author = "C.A. Sotriffer and H. Gohlke and G. Klebe", title = "Docking into knowledge-based potential fields: A comparative evaluation of {DrugScore}", journal = "J. Med. Chem.", volume = "45", year = "2002", pages = "1967-1970", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-protein binding site analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Chakrabarti02, author = "P. Chakrabarti and J. Janin", title = "Dissecting protein-protein recognition sites", journal = "Proteins: Structure, Function, and Genetics", volume = "47", number = "3", year = "2002", pages = "334-343", abstract = "The recognition sites in 70 pairwise protein-protein complexes of known three-dimensional structure are dissected in a set of surface patches by clustering atoms at the interface. When the interface buries <2000 Angstroms^2 of protein surface, the recognition sites usually form a single patch on the surface of each component protein. In contrast, larger interfaces are generally multipatch, with at least one pair of patches that are equivalent in size to a single-patch interface. Each recognition site, or patch within a site, contains a core made of buried interface atoms, surrounded by a rim of atoms that remain accessible to solvent in the complex. A simple geometric model reproduces the number and distribution of atoms within a patch. The rim is similar in composition to the rest of the protein surface, but the core has a distinctive amino acid composition, which may help in identifying potential protein recognition sites on single proteins of known structures.", } @Article{Jones00, author = "S. Jones and A. Marin and J.M. Thornton", title = "Protein domain interfaces: characterization and comparison with oligomeric protein interfaces", journal = "Protein Engineering", volume = "13", number = "2", pages = "77-82", year = "2000", abstract = "The physical and chemical properties of domain-domain interactions have been analysed in two-domain proteins selected from the protein classification, CATH. The two-domain structures were divided into those derived from (i) monomeric proteins, or (ii) oligomeric or complexed proteins. The size, polarity, hydrogen bonding and packing of the intra-chain domain interface were calculated for both sets of two-domain structures. The results were compared with inter-chain interface parameters from permanent and non-obligate protein-protein complexes. In general, the intra-chain domain and inter-chain interfaces were remarkably similar. Many of the intra-chain interface properties are intermediate between those calculated for permanent and non-obligate inter-chain complexes. Residue interface propensities were also found to be very similar, with hydrophobic residues playing a major role, together with positively charged arginine residues. In addition, the residue composition of the domain interfaces were found to be more comparable with domain surfaces than domain cores. The implications of these results for domain swapping and protein folding are discussed.", } @Article{Bogan98, author = "A.A. Bogan and K.S. Thorn", title = "Anatomy of hot spots in protein interfaces", journal = "J. Mol. Biol.", volume = "280", year = "1998", pages = "1-9", } @Article{DeLano02, author = "W.L. DeLano", title = "Unraveling hot spots in binding interfaces: Progress and challenges", journal = "Curr. Opin. Struct. Biol.", volume = "12", year = "2002", pages = "14-20", } @Article{Hu00, author = "Z. Hu and B. Ma and H. Wolfson and R. Nussinov", title = "Conservation of polar residues as hot spots at protein-protein interfaces", journal = "Proteins", volume = "39", year = "2000", pages = "331-342", } @Article{Jones97, author = "S. Jones and J.M. Thornton", title = "Analysis of protein-protein interaction sites using surface patches", journal = "Journal of Molecular Biology", volume = "272", number = "1", month = "September", year = "1997", pages = "121-132", } @Article{Jones96, author = "S. Jones and J.M. Thornton", title = "Principles of protein-protein interactions", journal = "PNAS", volume = "93", number = "1", year = "1996", pages = "13-20", abstract = "This review examines protein complexes in the Brookhaven Protein Databank to gain a better understanding of the principles governing the interactions involved in protein-protein recognition. The factors that influence the formation of protein-protein complexes are explored in four different types of protein-protein complexeshomodimeric proteins, heterodimeric proteins, enzyme-inhibitor complexes, and antibody-protein complexes. The comparison between the complexes highlights differences that reflect their biological roles.", } @Article{LoConte98, author = "L. {Lo Conte} and C. Chothia and J. Janin", title = "The atomic structure of protein-protein recognition sites", journal = "J Mol Biol", volume = "285", year = "1998", pages = "2177-2198", } @Article{Norel99, author = "R. Norel and D. Petrey and H.J. Wolfson and R. Nussinov", title = "Examination of shape complementarity in docking of unbound proteins", journal = "Proteins", volume = "36", year = "1999", pages = "307-317", } @Article{Larsen98, author = "T.A. Larsen and A.J. Olson and D.S. Goodsell", title = "Morphology of protein-protein interfaces", journal = "Structure", volume = "6", number = "4", year = "1998", pages = "421-427", abstract = "BACKGROUND: Most soluble proteins are active as low-number oligomers. Statistical surveys of oligomeric proteins have defined the roles of hydrophobicity and complementarity in the stability of protein interfaces, but tend to average structural features over a diverse set of protein-protein interfaces, blurring information on how individual interfaces are stabilized. RESULTS: We report a visual survey of 136 homodimeric proteins from the Brookhaven Protein Data Bank, with images that highlight the major structural features of each protein-protein interaction surface. Nearly all of these proteins have interfaces formed between two globular subunits. Surprisingly, the pattern of hydrophilicity over the surface of these interfaces is quite variable. Approximately one-third of the interfaces show a recognizable hydrophobic core, with a single large, contiguous, hydrophobic patch surrounded by a ring of intersubunit polar interactions. The remaining two-thirds of the proteins show a varied mixture of small hydrophobic patches, polar interactions and water molecules scattered over the entire interfacial area. Ten proteins in the survey have intertwined interfaces formed by extensive interdigitation of the two subunit chains. These interfaces are very hydrophobic and are associated with proteins that require both stability and internal symmetry. CONCLUSIONS: The archetypal protein interface, with a defined hydrophobic core, is present in only a minority of the surveyed homodimeric proteins. Most homodimeric proteins are stabilized by a combination of small hydrophobic patches, polar interactions and a considerable number of bridging water molecules. The presence or absence of a hydrophobic core within these interfaces does not correlate with specific protein functions.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-protein binding site prediction %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Neuvirth04, author = "H. Neuvirth and R. Raz and G. Schreiber", title = "{ProMate:} A structure based prediction program to indentify the location of protein-protein binding sites", journal = "J Mol Biol", volume = "338", year = "2004", pages = "181-199", } @article{Espadaler05, author = "J. Espadaler and O. {Romero-Isart} and R.M. Jackson and B. Oliva", title = "Prediction of protein-protein interactions using distant conservation of sequence patterns and structure relationships", journal = "Bioinformatics", volume = "21", year = "2005", pages = "3360-3368", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-protein docking overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Szilagyi05, author = "A. Szilagyi and V. Grimm and A.K. Arakaki and J. Skolnick", title = "Prediction of physical protein-protein interactions", journal = "Phys. Biol.", volume = "2", year = "2005", pages = "S1-S16", abstract = "Many essential cellular processes such as signal transduction, transport, cellular motion and most regulatory mechanisms are mediated by protein-protein interactions. In recent years, new experimental techniques have been developed to discover the protein-protein interaction networks of several organisms. However, the accuracy and coverage of these techniques have proven to be limited, and computational approaches remain essential both to assist in the design and validation of experimental studies and for the prediction of interaction partners and detailed structures of protein complexes. Here, we provide a critical overview of existing structure-independent and structure-based computational methods. Although these techniques have significantly advanced in the past few years, we find that most of them are still in their infancy. We also provide an overview of experimental techniques for the detection of protein-protein interactions. Although the developments are promising, false positive and false negative results are common, and reliable detection is possible only by taking a consensus of different experimental approaches. The shortcomings of experimental techniques affect both the further development and the fair evaluation of computational prediction methods. For an adequate comparative evaluation of prediction and high-throughput experimental methods, an appropriately large benchmark set of biophysically characterized protein complexes would be needed, but is sorely lacking.", } @Article{Salwinski03, author = "L. Salwinski and D. Eisenberg", title = "Computational methods of analysis of protein-protein interactions", journal = "Curr Opin Struct Biol", volume = "13", year = "2003", pages = "377-382", } @Article{Valencia02, author = "A. Valencia and F. Pazos", title = "Computational methods for the prediction of protein interactions", journal = "Curr Opin Struct Biol", volume = "12", year = "2002", pages = "368-373", } @Article{Smith02, author = "G.R. Smitth and M.J.E. Sternberg", title = "Prediction of protein-protein interactions by docking methods", journal = "Current Opinion in Structural Biology", volume = "12", year = "2002", pages = "28-35", abstract = "Recently, developments have been made in predicting the structure of docked complexes when the coordinates of the components are known. The process generally consists of a stage during which the components are combined rigidly and then a refinement stage. Several rapid new algorithms have been introduced in the rigid docking problem and promising refinement techniques have been developed, based on modified molecular mechanics force fields and empirical measures of desolvation, combined with minimisations that switch on the short-range interactions gradually. There has also been progress in developing a benchmark set of targets for docking and a blind trial, similar to the trials of protein structure prediction, has taken place.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-protein docking methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Schueler-Furman05, author = "O. Schueler-Furman and C. Wang and D. Baker", title = "Progress in protein-protein docking: Atomic resolution predictions in the {CAPRI} experiment using {RosettaDock} with an improved treatment of side-chain flexibility", journal = "Proteins: Structure, Function, and Bioinformatics", volume = "60", number = "2", year = "2005", pages = "187-194", asbtract = "RosettaDock uses real-space Monte Carlo minimization (MCM) on both rigid-body and side-chain degrees of freedom to identify the lowest free energy docked arrangement of 2 protein structures. An improved version of the method that uses gradient-based minimization for off-rotamer side-chain optimization and includes information from unbound structures was used to create predictions for Rounds 4 and 5 of CAPRI. First, large numbers of independent MCM trajectories were carried out and the lowest free energy docked configurations identified. Second, new trajectories were started from these lowest energy structures to thoroughly sample the surrounding conformation space, and the lowest energy configurations were submitted as predictions. For all cases in which there were no significant backbone conformational changes, a small number of very low-energy configurations were identified in the first, global search and subsequently found to be close to the center of the basin of attraction in the free energy landscape in the second, local search. Following the release of the experimental coordinates, it was found that the centers of these free energy minima were remarkably close to the native structures in not only the rigid-body orientation but also the detailed conformations of the side-chains. Out of 8 targets, the lowest energy models had interface root-mean-square deviations (RMSDs) less than 1.1 Å from the correct structures for 6 targets, and interface RMSDs less than 0.4 Angstroms for 3 targets. The predictions were top submissions to CAPRI for Targets 11, 12, 14, 15, and 19. The close correspondence of the lowest free energy structures found in our searches to the experimental structures suggests that our free energy function is a reasonable representation of the physical chemistry, and that the real space search with full side-chain flexibility to some extent solves the protein-protein docking problem in the absence of significant backbone conformational changes. On the other hand, the approach fails when there are significant backbone conformational changes as the steric complementarity of the 2 proteins cannot be modeled without incorporating backbone flexibility, and this is the major goal of our current work.", } @InProceedings{Choi04, author = "V. Choi and N. Goyal", title = "A Combinatorial Shape Matching Algorithm for Rigid Protein Docking", booktitle = "The Fifteenth Annual Symposium on Combinatorial Pattern Matching ({CPM} 2004)", volume = "{LNCS} 3109", year = "2004", pages = "285-296", } @Article{Meyer96, author = "M. Meyer and P. Wilson and D. Schomburg", title = "Hydrogen bonding and molecular surface shape complementarity as a basis for protein docking", journal = "J. Mol. Biol", volume = "264", year = "1996", pages = "199-210", } @Article{Sobolev96, author = "V. Sobolev and R.C. Wade and G. Vrien and M. Edelman", title = "Molecular docking using surface complementarity", journal = "Proteins Struct. Func. Genet", volume = "25", year = "1996", pages = "120-129", } @Article{Helmer94, author = "M. Helmer-Citterich and A. Tramontano", title = "Puzzle: a new method for automated protein docking based on surface shape complementarity", journal = "J. Mol. Biol", volume = "235", year = "1994", pages = "1021-1031", } @Article{Norel95, author = "R. Norel and S.L. Lin and H.J. Wolfson and R. Nussinov", title = "Molecular surface complementarity at protein-protein interfaces: the critical role played by surface normals at well placed, sparse, points in docking", journal = "J. Mol. Biol", volume = "252", year = "1995", pages = "263-273", } @Article{Young94, author = "L. Young and R.L. Jernigan and D.G. Covell", title = "A role for surface hydrophobicity in protein-protein recognition", journal = "Protein Sci", year = "1994", month = "May", volume = "3", number = "5", pages = "717-29", } @Article{Gabb97, author = "H. Gabb and R. Jackson and M. Sternberg", title = "Modelling protein docking using shape complementarity, electrostatics, and biochemical information", journal = "J. Mol. Bio", volume = "272", year = "1997", pages = "106-120", comment = "``A protein docking study was performed for two classes of biomolecular complexes: six enzyme/inhibitor and four antibody/antigen. Biomolecular complexes for which crystal structures of both the complexed and uncomplexed proteins are available were used for eight of the ten test systems. Our docking experiments consist of a global search of translational and rotational space followed by refinement of the best predictions. Potential complexes are scored on the basis of shape complementarity and favourable electrostatic interactions using Fourier correlation theory. Since proteins undergo conformational changes upon binding, the scoring function must be sufficiently soft to dock unbound structures successfully. Some degree of surface overlap is tolerated to account for side-chain flexibility. Similarly for electrostatics, the interaction of the dispersed point charges of one protein with the Coulombic field of the other is measured rather than precise atomic interactions. We tested our docking protocol using the native rather than the complexed forms of the proteins to address the more scientifically interesting problem of predictive docking. In all but one of our test cases, correctly docked geometries (interface Calpha RMS deviation 100 000 compounds) containing multiple (11) activity classes. Structure-unaware atom count vectors as descriptors in combination with the Euclidean distance measure are able to achieve enrichment factors over random selection of around 4 (depending on the particular class of active compounds), putting the enrichment factors reported for more sophisticated virtual screening methods in a different light. They are also able to retrieve active compounds with novel scaffolds instead of merely the expected structural analogues. The added value of many currently used virtual screening methods (calculated as enrichment factors) drops down to a factor of between 1 and 2, instead of often reported double-digit figures. The observed effect is much less profound for simple descriptors such as molecular weight and is only present in cases of atypical (larger) ligands. The current state of virtual screening is not as sophisticated as might be expected, which is due to descriptors still not being able to capture structural properties relevant to binding. This fact can partly be explained by highly nonlinear structure-activity relationships, which represent a severe limitation of the similar property principle in the context of bioactivity.", } @Article{Shoichet02, author = "B.K. Shoichet and S.L. McGovern and B. Wei and J.J. Irwin", title = "Lead discovery using molecular docking", journal = "Curr Opin Chem Biol", volume = "6", year = "2002", pages = "439-446", } @Article{Lyne02, author = "P.D. Lyne", title = "Structure-based virtual screening: an overview", journal = "DDT", volume = "7", number = "20", month = "October", year = "2002", pages = "1047-1055", abstract = "Enormous advances in genomics have resulted in a large increase in the number of potential therapeutic targets that are available for investigation. This growth in potential targets has increased the demand for reliable target validation, as well as technologies that can identify rapidly several quality lead candidates. Virtual screening, and in particular receptorbased virtual screening, has emerged as a reliable, inexpensive methodfor identifying leads. Although still an evolving method, advances in computational techniques have enabled virtual screening to have a positive impact on the discovery process. Here, the current strengths and weaknesses of the technology are discussed, and emphasis is placed on aspects of the work-flow of a virtual screening campaign, from preparation through to post-screening analysis.", } @Article{Abagyan01, author = "R. Abagyan and M. Totrov", title = "High-throughput docking for lead generation", journal = "Curr Opin Chem Biol", volume = "5", year = "2001", pages = "375-382", abstract = "Recent improvements in flexible docking technology may lead to a bigger role for computational methods in lead discovery. Although fast and accurate computational prediction of binding affinities for an arbitrary molecule is still beyond the limits of current methods, the docking and screening procedures can select small sets of likely lead candidates from large libraries of either commercially or synthetically available compounds.", } @Article{Walters98, author = "W.P.Walters and M.T. Stahl and M.A. Murcko", title = "Virtual screening - an overview", journal = "Drug Discov Today", volume = "3", year = "1998", pages = "160-178", } @Article{Shirai01, author = "H. Shirai and J. Shi and T.L. Blundell and K. Mizuguchi", title = "Structural bioinformatics as an approach to genomics-based drug discovery", journal = "Global Outsourcing Review", volume = "3", year = "2001", pages = "48-53", } @Misc{Bajorath02, author = "J. Bajorath", title = "Virtual screening in drug discovery: methods, expectations and reality", note = "www.current drugdiscovery.com", month = "March", year = "2002", comment = "Provides overview of virtual screening, points to successes", } @Article{Barril04, title = "Virtual Screening in Structure-Based Drug Discovery", author = "X. Barril and R.E. Hubbard and S.D. Morley", journal = "Mini Reviews in Medicinal Chemistry", volume = "4", number = "7", month = "September", year = "2004", pages = "779-791", abstract = "Recent advances in structure determination and computational methods have encouraged the development of structure-based virtual screening. Here we survey progress in the field and review the most recent methods, validation experiments and real applications, including an in-house example of hit identification for the oncology target Hsp90. These results provide a basis for discussing the current state of structure-based virtual screening and to outline the developments that are expected to have a major impact in the near future.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Ligand-based drug screening methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Paul04, author = "N. Paul and E. Kellenberger and G. Bret and P. Muller and Didier Rognan", title = "Recovering the True Targets of Specific Ligands by Virtual Screening of the Protein Data Bank", journal = "PROTEINS: Structure, Function, and Bioinformatics", volume = "54", year = "2004", pages = "671-680", } @Article{Zauhar03, author = "R.J. Zauhar and G. Moyna and L. Tian and Z. Li and W.J. Welsh", title = "Shape Signatures: A New Approach to Computer-Aided Ligand- and Receptor-Based Drug Design", journal = "J. Med. Chem.", volume = "46", year = "2003", pages = "5674-5690", abstract = "A unifying principle of rational drug design is the use of either shape similarity or complementarity to identify compounds expected to be active against a given target. Shape similarity is the underlying foundation of ligand-based methods, which seek compounds with structure similar to known actives, while shape complementarity is the basis of most receptorbased design, where the goal is to identify compounds complementary in shape to a given receptor. These approaches can be extended to include molecular descriptors in addition to shape, such as lipophilicity or electrostatic potential. Here we introduce a new technique, which we call shape signatures, for describing the shape of ligand molecules and of receptor sites. The method uses a technique akin to ray-tracing to explore the volume enclosed by a ligand molecule, or the volume exterior to the active site of a protein. Probability distributions are derived from the ray-trace, and can be based solely on the geometry of the reflecting ray, or may include joint dependence on properties, such as the molecular electrostatic potential, computed over the surface. Our shape signatures are just these probability distributions, stored as histograms. They converge rapidly with the length of the ray-trace, are independent of molecular orientation, and can be compared quickly using simple metrics. Shape signatures can be used to test for both shape similarity between compounds and for shape complementarity between compounds and receptors and thus can be applied to problems in both ligand- and receptor-based molecular design. We present results for comparisons between small molecules of biological interest and the NCI Database using shape signatures under two different metrics. Our results show that the method can reliably extract compounds of shape (and polarity) similar to the query molecules. We also present initial results for a receptor-based strategy using shape signatures, with application to the design of new inhibitors predicted to be active against HIV protease.", } @Article{Chen01, author = "Y.Z. Chen and D.G. Zhi", title = "Ligand-Protein Inverse Docking and Its Potential Use in the Computer Search of Protein Targets of a Small Molecule", journal = "PROTEINS: Structure, Function, and Genetics", volume = "43", pages = "217-226", year = "2001", abstract = "Ligand-protein docking has been developed and used in facilitating newdrug discoveries. In this approach, docking single or multiple small molecules to a receptor site is attempted to find putative ligands. A number of studies have shown that docking algorithms are capable of finding ligands and binding conformations at a receptor site close to experimentally determined structures. These algorithms are expected to be equally applicable to the identification of multiple proteins to which a small molecule can bind or weakly bind. We introduce a ligand-protein inverse-docking approach for finding potential protein targets of a small molecule by the computer-automated docking search of a protein cavity database. This database is developed from protein structures in the Protein Data Bank (PDB). Docking is conducted with a procedure involving multiple-conformer shapematching alignment of a molecule to a cavity followed by molecular-mechanics torsionoptimization and energy minimization on both the molecule and the protein residues at the binding region. Scoring is conducted by the evaluation of molecular-mechanics energy and, when applicable, by the further analysis of binding competitiveness against other ligands that bindto the same receptor site inat least one PDB entry. Testing results on two therapeutic agents, 4H-tamoxifen and vitamin E, showed that 50\% of the computer-identified potential protein targets were implicated or confirmed by experiments. The application of this approach may facilitate the prediction of unknown and secondary the rapeutic target proteins and those related to the side effects and toxicity of a drug or drug candidate.", } @Article{Labute05, author = "P. Labute", title = "On the perception of molecules from 3D atomic coordinates", journal = "J Chem Inf Model", volume = "45", number = "2", year = "2005", pages = "215-221", abstract = "A method is presented for perceiving chemical types of atoms in molecules given 3D atomic coordinates and element identities. The method assigns hybridizations, bond orders, and formal charges for structures whether hydrogen atoms are present. The Maximum Weighted Matching algorithm for nonbipartite graphs is used to assign bond orders with weights derived from statistics of a large collection of organic molecules. Results form tests on a collection of functional groups, heterocycles, entries from the Protein Data Bank, and Cambridge Structural Database as well as a comparison to other methods, are presented and discussed.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure-based drug design overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Beavers02, author = "M.P. Beavers and X. Chen", title = "Structure-based combinatorial library design: methodologies and applications", journal = "Journal of Molecular Graphics and Modelling", volume = "20", year = "2002", pages = "463-468", abstract = "Rational design of small focused libraries that are biased toward specific therapeutic targets is currently at the forefront of combinatorial library design. Various structure-based design strategies can be implemented in focused library design when the 3D structure of the target is available through X-ray or NMR determination. This review discusses the major methods and programs specifically developed for the purpose of designing combinatorial libraries under the constraint of the binding site of a biological target, with emphasis on their advantages and disadvantages. Examples of the successful application of these methodologies are highlighted, demonstrating their performances within the practical drug discovery process.", } @Article{Veselovsky03, author = "A.V. Veselovsky and A.S. Ivanov", title = "Strategy of Computer-Aided Drug Design", journal = "Current Drug Targets - Infectious Disorders", volume = "3", number = "1", month = "March", year = "2003", pages = "33-40", abstract = "Modern strategies of computer-aided drug design (CADD) are reviewed. The task of CADD in the pipeline of drug discovery is accelerating of finding the new lead compounds and their structure optimization for the following pharmacological tests. The main directions in CADD are based on the availability of the experimentally determined three-dimensional structure of the target macromolecule. If spatial structure is known the methods of structure-based drug design are used. In the opposite case the indirect methods of CADD based on the structures of known ligands (ligand-based drug design) are used. The interrelationship between the main directions of CADD is reviewed. The main CADD approaches of molecule de novo design and database mining are described. They include methods of molecular docking, de novo design, design of pharmacophore and quantity structure-activity relationship models. New ways and perspectives of CADD are discussed.", } @Article{Klebe00, author = "G. Klebe", title = "Recent developments in structure-based drug design", journal = "J Mol Med", volume = "78", year = "2000", pages = "269-281", } @Article{Gane00, author = "P.J. Gane and P.M. Dean", title = "Recent advances in structure-based rational drug design", journal = "Curr Opin Struct Biol", volume = "10", year = "2000", pages = "401-404", } @Article{Ooms00, author = "F. Ooms", title = "Molecular Modeling and Computer Aided Drug Design. Examples of their Applications in Medicinal Chemistry", journal = "Current Medicinal Chemistry", volume = "7", year = "2000", pages = "141-158", abstract = "The development of new drugs with potential therapeutic applications is one of the most complex and difficult process in the pharmaceutical industry. Millions of dollars and man-hours are devoted to the discovery of new therapeutical agents. As, the activity of a drug is the result of a multitude of factors such as bioavailability, toxicity and metabolism, rational drug design has been utopias for centuries. Very recently, impressive technological advances in areas such as structural characterization of biomacromolecules, computer sciences and molecular biology have made rational drug design feasible. The aim of this review is to give an outline of studies in the field of medicinal chemistry in which molecular modeling has helped in the discovery process of new drugs. The emphasis will be on lead generation and optimization.", } @Article{Anderson02, author = "S. Anderson and J. Chiplin", title = "Structural genomics: shaping the future of drug design?", journal = "Drug Discov Today", volume = "7", year = "2002", pages = "105-107", } @Article{Marrone97, author = "T.J. Marrone and J.M. Briggs and J.A. McCammon", title = "Structure-based drug design: Computational Advances", journal = "Annu. Rev. Pharmacol. Toxicol.", volume = "37", year = "1997", pages = "71-90", } @Article{Bohacek97, author = "R.S. Bohacek and C. McMartin", title = "Modern computational chemistry and drug discovery: structure generating programs", journal = "Curr. Opin. Chem. Biol.", volume = "1", year = "1997", pages = "157-161", comment = "``During 1996 and 1997, the first reports were disclosed of active enzyme inhibitors based entirely on novel structures created by de novo methods. De novo methods have also been used to modify and significantly improve the binding affinity of an HIV protease inhibitor. Work continues in the improvement of methods for the de novo design of compounds which fit and chemically complement a binding site. De novo algorithms that generate only synthetically feasible structures have also been reported. In addition, methods are being developed for the automatic computer generation of virtual molecular libraries which can be searched to identify molecules to match a pharmacophore or fit into a binding site.''", } @InCollection{Charifson97, author = "P. Charifson and I.D. Kuntz", title = "Recent Successes and Continuing Limitations in Computer-Aided Drug Design", booktitle = "Practical Application of Computer-Aided Drug Design", publisher = "Marcel-Dekker", location = "New York", year = "1997", pages = "1-37", } @Article{Kuntz92, author = "D. Kuntz", title = "Structure-based Strategies for Drug Design and Discovery", journal = "Science", volume = "257", pages = "1078-1082", year = "1992", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure-based drug design with fragment-based methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Buhm92, author = "H.J. Buhm", title = "The Computer Program {Ludi}: A New Method for the De Novo Design of Enzyme Inhibitors", journal = "J. Comp. Aided Molec. Design", volume = "6", year = "1992", pages = "61-78", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure-based drug design with knowledge-based methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Grzybowski02, author = "B.A. Grzybowski and A.V. Ishchenko and J. Shimada and E.I Shakhnovich", title = "From Knowledge-Based Potentials to Combinatorial Lead Design in Silico", journal = "Acc. Chem. Res.", volume = "35", year = "2002", pages = "261-262", abstract = "Computational methods are becoming increasingly used in the drug discovery process. In this Account, we review a novel computational method for lead discovery. This method, called CombiSMoG for `combinatorial small molecule growth', is based on two components: a fast and accurate knowledge-based scoring function used to predict binding affinities of protein-ligand complexes, and a Monte Carlo combinatorial growth algorithm that generates large numbers of low-free-energy ligands in the binding site of a protein. We illustrate the advantages of the method by describing its application in the design of picomolar inhibitors for human carbonic anhydrase.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Structure-based drug design with ... %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Eisen94, author = "M.B. Eisen and D.C. Wiley and M. Karplus and R.E. Hubbard", title = "{HOOK:} A Program for finding novel molecular architectures that satisfy the chemical and steric requirements of a macromolecule binding site", journal = "Proteins", volume = "19", year = "1994", pages = "199-221", } @Article{vonItzstein93, author = "Mark {von Itzstein} and Wen-Yang Wu and Gaik B. Kok and Michael S. Pegg and Jeffrey C. Dyason and Betty Jin and Tho Van Phan and Mark L. Smythe and Hume F. White and Stuart W. Oliver and Peter M. Colman and Joseph N. Varghese and D. Michael Ryan and Jacqueline M. Woods and Richard C. Bethell and Vanessa J. Hotham and Janet M. Cameron and Charles R. Penn", title = "Rational design of potent sialidase-based inhibitors of influenza virus replication", journal = "Nature", volume = "363", pages = "418-423", month = "June", year = "1993", comment = "Uses GRID for drug design", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Quantitative structure activity relationship (QSAR) overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Winkler01, author = "D.A. Winkler", title = "The role of quantitative structure-activity relationships (QSAR) in biomolecular discovery", journal = "Briefings in Bioinformatics", volume = "3", number = "1", year = "2002", pages = "73-86", abstract = "Empirical methods for building predictive models of the relationships between molecular structure and useful properties are becoming increasingly important. This has arisen because drug discovery and development have become more complex. A large amount of biological target information is becoming available through molecular biology. Automation of chemical synthesis and pharmacological screening has also provided a vast amount of experimental data. Tools for designing libraries and extracting information from molecular databases and high-throughput screening experiments robustly and quickly enable leads to be discovered more effectively. As drug leads progress down the development pipeline, the ability to predict physicochemical, pharmacokinetic and toxicological properties of these leads is becoming increasingly important in reducing the number of expensive, late development failures. Quantitative structure-activity relationship (QSAR) methods have much to offer in these areas. However, QSAR analysis has many traps for unwary practitioners. This review introduces the concepts behind QSAR, points out problems that may be encountered, suggests ways of avoiding the pitfalls and introduces several exciting, new QSAR methods discovered during the last decade.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein-DNA binding %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Havranek04, author = "J.J. Havranek and C.M. Duarte and D. Baker", title = "A simple physical model for the prediction and design of protein-DNA interactions", journal = "J Mol Biol", year = " 2004", volume = "344", pages = "59-70", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Molecular surfaces %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Connolly83, author = "M.L. Connolly", title = "Solvent-accessible surfaces of proteins and nucleic acids", journal = "Science", volume = "221", pages = "709-713", year = "1983", comment = "This is the main reference for the Connolly surface", } @Article{Connolly83b, author = "M. L. Connolly", title = "Analytical Molecular Surface Calculation", journal = "Journal of Applied Crystallography", year = "1983", volume = "16", pages = "548-558", } @Article{Connolly85, author = "M.L. Connolly", title = "Molecular surface triangulation", journal = "J. Appl. Crystallogr.", volume = "18", year = "1985", pages = "499-505", } @Article{Connolly86a, author = "M.L. Connolly", title = "Measurement of protein surface shape by solid angles", journal = "J. Mol. Graphics", volume = "4", year = "1986", pages = "3-6", } @Article{Connolly86b, author = "M.L. Connolly", title = "Plotting protein surfaces", journal = "J. Mol. Graphics", volume = "4", year = "1986" , pages = "93-96", } @Article{Connolly93, author = "M.L. Connolly", title = "The molecular surface package", journal = "J. Mol. Graphics", volume = "11", year = "1993", pages = "139-141", comment = "http://www.biohedron.com/", } @Article{Sanner96, author = "M.F. Sanner and J.C. Spehner and A.J. Olson", title = "Reduced surface: an efficient way to compute molecular surfaces", journal = "Biopolymers", volume = "38", number = "3", year = "1996", pages = "305-320", comment = "http://www.scripps.edu/pub/olson-web/people/sanner/html/msms_home.html", } @Article{Eisenhaber93, author = "F. Eisenhaber and P. Argos", title = "Improved Strategy in Analytic Surface Calculation for Molecular Systems: Handling of Singularities and Computational Efficiency", journal = "Journal of Computational Chemistry", volume = "14", number = "11", year = "1993", pages = "1272-1280", comment = "http://mendel.imp.univie.ac.at/SURFACE/ASC/asc2.html", } @Article{Eisenhaber95, author = "F. Eisenhaber and P. Lijnzaad and P. Argos and C. Sander and M. Scharf", title = "The Double Cubic Lattice Method: Efficient approaches to numerical integration of surface area and volume and to dot surface contouring of molecular assemblies", journal = "J. Comp. Chem.", volume = "16", number = "3", year = "1995", pages = "273-284", } @Article{Lee71, author = "B. Lee and F.M. Richards", title = "The Interpretation of Protein Structures: Estimation of Static Accessibility", journal = "Journal of Molecular Biology", volume = "55", year = "1971", pages = "379-400", comment = "This is the main reference for the solvent accessible surface", } @Article{Greer78, author = "J. Greer and B. Bush", title = "Macromolecular Shape and SurfaceMaps by Solvent Exclusion", journal = "Proceedings of the National Academy of Sciences USA", year = "1978", volume = "75", pages = "303-307", comment = "Early method for computing solvent accessible surfaces", } @Article{ODonnell92, author = "T.J. O'Donnell", title = "Interactive Computation and Display of Molecular Surfaces", journal = "Journal of Molecular Graphics", year = "1992", volume = "10", pages = "39-40", } @Article{Klein90, author = "T. Klein and C. Huang and E. Pettersen and G. Couch and T. Ferrin and R. Langridge", title = "A Real-Time Malleable Molecular Surface", journal = "Journal of Molecular Graphics", volume = "8", year = "1990", pages = "16-24 and 26-27", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Secondary Structure Prediction %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Kabsch83, author = "W. Kabsch and C. Sander", title = "Dictionary of Protein Secondary Structure: Pattern Recognition of Hydrogen-Bonded and Geometrical Features", journal = "Biopolymers", volume = "22", year = "1983", pages = "2577-2637", comment = "This is a paper about DSSP, a method for predicting secondary structure from sequence", } @Article{Jones99, author = "D.T. Jones", title = "Protein secondary structure prediction based on position-specific scoring matrices", journal = "J. Mol. Biol.", volume=292, year = 1999, pages= "195--202" comment = "PSIPRED: One of the leading protein secondary structure prediction methods", } @Article{King96, author = "R.D. King and M.J.E. Sternberg", title = "Identification and application of the concepts important for accurate and reliable protein secondary structure prediction", journal = "Prot. Sci.", volume = "5", year = "1996", pages = "2298-2310", } @Article{Garnier96, author = "J. Garnier and J.F. Gibrat and B. Robson", title = "{GOR} method for predicting protein secondary structure from amino acid sequence", journal = "Methods Enzymol", volume = "266", year = "1996", pages = "540-553", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Tertiary structure prediction overviews %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Book{Sternberg97, author = "M.J.E. Sternberg", title = "Protein Structure Prediction - A practical approach", publisher = "Oxford University Press", year = "1997", note = "{ISBN:0199634963}", url = "http://www.amazon.com/exec/obidos/tg/detail/-/0199634963/qid=1127051707/sr=1-1/ref=sr_1_1/104-9907891-5367125?v=glance&s=books", } @Article{Baker01, author = "D. Baker and A. Sali", title = "Protein Structure Prediction and Structural Genomics", journal = "Science", volume = "294", number = "5540", year = "2001", pages = "93-96", } @Article{Jones00, author = "D.T. Jones", title = "Protein Structure Prediction in the Postgenomic Era", journal = "Current Opinion in Structural Biology", volume = "10", number = "3", month = "June", year = "2000", pages = "371-379", } @Article{Simons01, author = "K.T. Simons, C. Strauss, and D. Baker", title = "Prospects for ab initio Protein Structural Genomics", journal = "J. Molecular Biology", volume = "306", number = "5", year = "2001", pages = "1191-1199", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Tertiary structure prediction evaluations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Moult03, author = "J. Moult and K. Fidelis and A. Zemla and T. Hubbard", title = "Critical assessment of methods of protein structure prediction {(CASP)} - round {V}", journal = "Proteins: Structure, Function, and Genetics", volume = "53", number = "S6" , pages = "334-339", abstract = "This article provides an introduction to the special issue of the journal Proteins dedicated to the fifth CASP experiment to assess the state of the art in protein structure prediction. The article describes the conduct, the categories of prediction, and the evaluation and assessment procedures of the experiment. A brief summary of progress over the five CASP experiments is provided. Related developments in the field are also described.", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Tertiary structure prediction with ab initio methods %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Floudas06, author = "C.A. Floudas and H.K. Fung and S.R. McAllister and M. {M\"onnigmann} and R. Rajgaria", title = "Advances in Protein Structure Prediction and De Novo Protein Design: A review", journal = "Chem. Eng. Sci.", year = "2006", volume = "61", pages = "966--988" comment = "Review paper for protein structure prediction and de novo protein design.", } @article{Klepeis02, AUTHOR = "J.L. Klepeis and C.A. Floudas", TITLE = "Ab initio prediction of helical segments in polypeptides", YEAR = "2002", JOURNAL = "J. Comput. Chem.", NUMBER = "2", VOLUME = "23", PAGES = "245--266" comment = "Method for predicting helical regions using detailed atomistic level modeling of overlapping oligopeptides.", } @Article{Klepeis03a, author = "J.L. Klepeis and C.A. Floudas", title = "Prediction of beta-sheet topology and disulfide bridges in polypeptides", journal = "J. Comput. Chem.", year = "2003", volume = "24", pages = "191-208" comment = "Method for predicting beta-strand locations and b-sheet topology using optimization techniques", } @Article{Klepeis03b, author = "J.L. Klepeis and C.A. Floudas", title = "{ASTRO-FOLD}: A Combinatorial and Global Optimization Framework for Ab Initio Prediction of Three-Dimensional Structures of Proteins from the Amino Acid Sequence", journal = "Biophys. J.", year = "2003", volume = 85, pages = "2119--2146" comment = "First principles framework for protein structure prediction", } @article{Monningmann05, author = "M. {M\"onnigmann} and C.A. Floudas", title = "Protein Loop Structure Prediction With Flexible Stem Geometries", journal = "Prot. Struct. Funct. Bioinf.", year = "2005", volume= "61", pages = "748-762" comment = "Loop structure prediction method for loops with flexible stems. Uses dihedral angle sampling and introduces a novel use of clustering.", } @Article{Liwo02, author = "A. Liwo and P. Arlukowicz and C. Czaplewski and S. Oldziej and J. Pillardy and H.A. Scheraga", title = "A method for optimizing potential-energy functions by hieracrchical design of the potential-energy landscape: Application to the {UNRES} force field", journal = "PNAS", year = "2002", volume = "99", pages = "1937-1942" comment = "One of the more recent papers by Scheraga and co-workers detailing the use of the united residue (UNRES) approach for protein tertiary structure prediciton", } @Article{Skolnick03, author = "J. Skolnick and Y. Zhang and A. K. Arakaki and A. Kolinski and M. Boniecki and A. {Szil\'agyi} and D. Kihara", title = "{TOUCHSTONE}: A Unified Approach to Protein Structure Prediction", journal = "Prot. Struct. Funct. Bioinf.", year = "2003", volume = "53", pages = "469-479" } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Tertiary structure prediction with x-ray crystallography %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{David03, author = "A.M. Davis and S.J. Teague and G.J. Kleywegt", title = "Application and limitations of X-ray crystallographic data in structure-based ligand and drug design", journal = "Angew. Chem., Int. Ed.", volume = "42", year = "2003", pages = "2718-2736", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Tertiary structure prediction with threading %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Lathrop94, author = "R.H. Lathrop", title = "The protein threading problem with sequence amino acid intraction preferences is NP-complete", journal = "Protein Eng", volume = "7", number = "9", year = "1994", pages = "1059-1068", } @Article{Xu03, author = "J. Xu and M. Li and D. Kim and Y. Xu", title = "{RAPTOR}: Optimial Protein Threading by Linear Programming", journal = "J.Bioinf. Comput. Biol.", year = "2003", volume = "1", pages = "95-117", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Protein structure evaluation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Eyal05, author = "E. Eyal and S. Gerzon and V. Potapov and M. Edelman and V. Sobolev", title = "The Limit of Accuracy of Protein Modeling: Influence of Crystal Packing on Protein Structure", journal = "j. Mol Biol.", volume = "351", year = "2005", pages = "431-442", abstract = "The size of the protein database (PDB) makes it now feasible to arrive at statistical conclusions regarding structural effects of crystal packing. These effects are relevant for setting upper practical limits of accuracy on protein modeling. Proteins whose crystals have more than one molecule in the asymmetric unit or whose structures were determined at least twice by X-ray crystallography were paired and their differences analyzed. We demonstrate a clear influence of crystal environment on protein structure, including backbone conformations, hinge-like motions and side-chain conformations. The positions of surface water molecules tend to be variable in different crystal environments while those of ligands are not. Structures determined by independent groups vary more than structures determined by the same authors. The use of different refinement methods is a major source for this effect. Our pair-wise analysis derives a practical limit to the accuracy of protein modeling. For different crystal forms, the limit of accuracy (Ca, root-mean-square deviation (RMSD)) isw0.8 A ° for the entire protein, which includes w0.3 A ° due to crystal packing. For organized secondary elements, the upper limit of Ca RMSD is 0.5Â-0.6 A ° while for loops or protein surface it reaches 1.0 A °. Twenty percent of exposed sidechains exhibit different c1C2 conformations with approximately half of the effect also resulting from crystal packing. Aweb based tool for analysis and graphic presentation of surface areas of crystal contacts is available (http://ligin.weizmann.ac.il/cryco).", } @Article{Wei99, author = "L. Wei and E.S. Huang and R.B. Altman", title = "Are predicted structures good enough to preserve functional sites?", journal = "Structure Fold Des", year = "1999", volume = "7", number = "6", pages = "643-650", comment = "``BACKGROUND: A principal goal of structure prediction is the elucidation of function. We have studied the ability of computed models to preserve the microenvironments of functional sites. In particular, 653 model structures of a calcium-binding protein (generated using an ab initio folding protocol) were analyzed, and the degree to which calcium-binding sites were recognizable was assessed. RESULTS: While some model structures preserve the calcium-binding microenvironments, many others, including some with low root mean square deviations (rmsds) from the crystal structure of the native protein, do not. There is a very weak correlation between the overall rmsd of a structure and the preservation of calcium-binding sites. Only when the quality of the model structure is high (rmsd less than 2 A for atoms in the 7 A local neighborhood around calcium) does the modeling of the binding sites become reliable. CONCLUSIONS: Protein structure prediction methods need to be assessed in terms of their preservation of functional sites. High-resolution structures are necessary for identifying binding sites such as calcium-binding sites.''", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Prediction of protein side-chain rotamers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Bower99, author = "M. J. Bower and F.E. Cowen and R.L. Dunbrack", title = "Prediction of protein side-chain rotamers from a backbone-dependent rotamer library: a new homology modeling tool", journal = "J. Mol. Biol.", year = "1999", volume = "267", pages= "1268-1282", comment = "This is a reference for the Dunbrack rotamer library", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Analysis of residue conservation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Mayrose04, author = "I. Mayrose and D. Graur and N. Ben-Tal and T. Pupko", year = "2004", title = "Comparison of site-specific rate-inference methods for protein sequences: Bayesian methods are superior", journal = "Mol Biol Evol", volume = "21", pages = "1781-1791", } @Article{Glaser05, author = "F. Glaser and Y. Rosenberg and A. Kessel and T. Pupko and N. {Ben-Tal}", year = "2005", title = "The {ConSurf-HSSP} database: The mapping of evolutionary conservation among homologs onto PDB structures", journal = "PROTEINS: Structure, Function, and Bioinformatics", volume = "58", pages = "610-617", } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Not classified yet %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{Aloy01, author = "P. Aloy and E. Querol and F.X. Aviles and M.J.E. Sternberg", title = "Automated structure-based prediction of functional sites in proteins: applications to assessing the validity of inheriting protein function from homology in genome annotation and to protein docking", year = "2001", journal = "J. Mol. Biol", volume = "311", pages = "395-408", } @InProceedings{TenEyck95, author = "L.F. Ten Eyck and J. Mandell and V.A. Roberts and M.E. Pique", title = "Surveying molecular interactions with DOT", booktitle = "1995 ACM/IEEE Supercomputing Conference", location = "New York", year = "1995", } @Article{Zhang05, author = "Y. Zhang and J. Skolnick", title = "The protein structure prediction problem could be solved using the current {PDB} library", journal = "PNAS", volume = "102", number = "4", year = "2005", pages = "1029-1034", } @Article{Glick02, author = "M. Glick and D.D. Robinson and G.H. Grant and W.G. Richards", title = "Identification of ligand binding sites on proteins using a multiscale approach", journal = "J. Am. Chem. Soc.", year = "2002", volume = "124", pages = "2337-2344", } @Article{Todd02, author = "A.E. Todd and C.A. Orengo and J.M. Thornton", title = "Plasticity of enzyme active sites", journal = "Trends in Biochemical Sciences", volume = "27", year = "2002", pages = "419-426", comment = "The expectation is that any similarity in reaction chemistry shared by enzyme homologues is mediated by common functional groups conserved through evolution. However, detailed enzyme studies have revealed the flexibility of many active sites, in that different functional groups, unconserved with respect to position in the primary sequence, mediate the same mechanistic role. Nevertheless, the catalytic atoms might be spatially equivalent. More rarely, the active sites have completely different locations in the protein scaffold. This variability could result from: (1) the hopping of functional groups from one position to another to optimize catalysis; (2) the independent specialization of a low-activity primordial enzyme in different phylogenetic lineages; (3) functional convergence after evolutionary divergence; or (4) circular permutation events.", } @Article{Pearl93, author = "L Pearl", title = "Similarity Of Active-Site Structures", journal = "Nature", volume = "362", year = "1993", pages = "24-24", comment = "This paper observes the similarities in the active sites of serine proteases", } @Article{Carlson02, author = "H.A. Carlson", title = "Protein Flexibility is an Important Component of Structure-Based Drug Discovery", journal = "Current Pharmaceutical Design", volume = "8", number = "17", year = "2002", pages = "1571-1578", abstract = "Receptor-based drug discovery can increase the novelty of a hit list over ligandbased models that are dependent on known inhibitors. It is important to explore new conformational and chemical space, but it is difficult to predict the plasticity of the binding site. Receptor-based methods are usually based on crystal structures of ligand-protein complexes, and hit lists can be restricted to the size and shape of the receptor model. Many improvements that accommodate protein flexibility in computer-aided drug design are being developed. These methods are reviewed with the focus being techniques that move beyond the rotation of side chains.The use of multiple protein structures is emerging as the best choice for including more realistic changes in protein conformation, but the optimal way to using these structures is still unclear.", } @Article{Betz02, author = "S.F. Betz and S.M. Baxter and J.S. Fetrow", title = "Function first: a powerful approach to post-genomic drug discovery", journal = "DDT", volume = "7", number = "16", month = "August", year = "2002", abstract = "In the post-genomic era, pharmaceutical researchers must evaluate vast numbers of protein sequences and formulate novel, intelligent strategies for identifying valid targets and discovering leads against them. The identification of small molecules that selectively target proteins or protein families will be aided by knowing the function and/or the structure of the target(s). By identifying protein function first, efficiencies are gained that allow subsequent focus of resources on particular protein families of interest. This article reviews current proteomic-scale approaches to identifying function as a way of accelerating lead discovery.", } @Article{Xie05, author = "L. Xie and P.E. Bourne", title = "Functional Coverage of the Human Genome by Existing Structures, Structural Genomics Targets and Homology Models", journal = "PLoS Comp Biol", volume = "1", number = "3", year = "2005", pages = "e31", abstract = "The sequencing of the human genome provides biologists with new opportunities to understand the molecular basis of physiological processes and disease states. To take full advantage of these opportunities the three-dimensional structure of the gene products are needed to provide the appropriate level of detail. Since protein structure determination lags behind protein sequence determination an important and on-going question becomes, what degree of coverage of the human proteome do we have from experimental structures and what can we infer by modeling? Or turning the question around, what structures do we need to determine (the most wanted list) to further our understanding of the human condition? This paper addresses these questions through integration of existing data resources correlated using comparative functional features, namely the gene ontology (GO) describing biochemical process, molecular function and cellular location for all types of proteins and the Enzyme Commission (EC) classification for enzymes. Genetic disease states are linked through the On-line Mendelian Inheritance in Man (OMIM) resource. The reader can ask their own questions of the resource at http://function.rcsb.org:8080/pdb/function_distribution/index.html. The resource should prove particularly useful to the structural genomics community as they strive to undertake large-scale structure determination with a goal of improving our understanding of protein functional space." } @Article{Nikolova04, author = "N. Nikolova and J. Jaworska", title = "Approaches to measure chemical similarity - A review", journal = "QSAR Comb. Sci.", year = "2004", volume = "22", pages = "1006-1026", } @Article{Hert04, author = "J. Hert and P. Willett and D.J. Wilton", title = "Comparison of fingerprint-based methods for virtual screening using multiple bioactive reference structures", journal = "J. Chem. Inf. Comput. Sci.", year = "2004", volume = "44", pages = "1177-1185", } @Article{Gelly06, author = "J. Gelly and A.G. {de Brevern} and S. Hazout", title = "{Protein Peeling:} an approach for splitting a 3D protein structure into compact fragments", journal = "Bioinformatics", volume = "22", number = "2", year = "2006", pages = "129-133", abstract = "Motivation: The object of this study is to propose a new method to identify small compact units that compose protein three-dimensional structures. These fragments, called protein units (PU)', are a new level of description to well understand and analyze the organization of protein structures. The method only works from the contact probability matrix, i.e. the inter Calpha-distances translated into probabilities. It uses the principle of conventional hierarchical clustering, leading to a series of nested partitions of the 3D structure. Every step aims at dividing optimally a unit into 2 or 3 subunits according to a criterion called partition index' assessing the structural independence of the subunits newly defined. Moreover, an entropy-derived squared correlation R is used for assessing globally the protein structure dissection. The method is compared to other splitting algorithms and shows relevant performance. Availability: An Internet server with dedicated tools is available at http://www.ebgm.jussieu.fr/~gelly/ Contact: debrevern@ebgm.jussieu.fr.", }