Publication list

2017

  • E. Audain, J. Uszkoreit, T. Sachsenberg, J. Pfeuffer, X. Liang, H. Hermjakob, A. Sanchez, M. Eisenacher, K. Reinert, D. L. Tabb, O. Kohlbacher, and Y. Perez-Riverol, “In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics,” Journal of proteomics, vol. 150, pp. 170-182, 2017.
    [Bibtex]
    @article{fu_mi_publications1939,
    title = {In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics},
    month = {January},
    volume = {150},
    author = {Enrique Audain and Julian Uszkoreit and Timo Sachsenberg and Julianus Pfeuffer and Xiao Liang and Henning Hermjakob and Aniel Sanchez and Martin Eisenacher and Knut Reinert and David L. Tabb and Oliver Kohlbacher and Yasset Perez-Riverol},
    pages = {170--182},
    year = {2017},
    journal = {Journal of Proteomics},
    publisher = {Elsevier},
    url = {http://publications.imp.fu-berlin.de/1939/},
    abstract = {In mass spectrometry-based shotgun proteomics, protein identifications are usually the desired result. However, most of the analytical methods are based on the identification of reliable peptides and not the direct identification of intact proteins. Thus, assembling peptides identified from tandem mass spectra into a list of proteins, referred to as protein inference, is a critical step in proteomics research. Currently, different protein inference algorithms and tools are available for the proteomics community. Here, we evaluated five software tools for protein inference (PIA, ProteinProphet, Fido, ProteinLP, MSBayesPro) using three popular database search engines: Mascot, X!Tandem, and MS-GF +. All the algorithms were evaluated using a highly customizable KNIME workflow using four different public datasets with varying complexities (different sample preparation, species and analytical instruments). We defined a set of quality control metrics to evaluate the performance of each combination of search engines, protein inference algorithm, and parameters on each dataset. We show that the results for complex samples vary not only regarding the actual numbers of reported protein groups but also concerning the actual composition of groups. Furthermore, the robustness of reported proteins when using databases of differing complexities is strongly dependant on the applied inference algorithm. Finally, merging the identifications of multiple search engines does not necessarily increase the number of reported proteins, but does increase the number of peptides per protein and thus can generally be recommended.}
    }

2016

  • S. Canzar, S. Andreotti, D. Weese, K. Reinert, and G. W. Klau, “Cidane: comprehensive isoform discovery and abundance estimation,” Genome biology, vol. 17, iss. 1, 2016.
    [Bibtex]
    @article{fu_mi_publications1830,
    title = {CIDANE: comprehensive isoform discovery and abundance estimation},
    volume = {17},
    month = {January},
    number = {1},
    author = {S. Canzar and S. Andreotti and D. Weese and K. Reinert and G. W. Klau},
    year = {2016},
    journal = {Genome Biology},
    publisher = {BioMed Central, Springer Science+Business Media},
    url = {http://publications.imp.fu-berlin.de/1830/},
    abstract = {We present CIDANE, a novel framework for genome-based transcript reconstruction and quantification from RNA-seq reads. CIDANE assembles transcripts efficiently with significantly higher sensitivity and precision than existing tools. Its algorithmic core not only reconstructs transcripts ab initio, but also allows the use of the growing annotation of known splice sites, transcription start and end sites, or full-length transcripts, which are available for most model organisms. CIDANE supports the integrated analysis of RNA-seq and additional gene-boundary data and recovers splice junctions that are invisible to other methods. CIDANE is available at http://?ccb.?jhu.?edu/?software/?cidane/?.}
    }
  • M. Jäger, M. Schubach, T. Zemojtel, K. Reinert, D. M. Church, and P. N. Robinson, “Alternate-locus aware variant calling in whole genome sequencing,” Genome medicine, vol. 8, iss. 1, 2016.
    [Bibtex]
    @article{fu_mi_publications2004,
    journal = {Genome Medicine},
    publisher = {BioMed Central (Springer Nature)},
    title = {Alternate-locus aware variant calling in whole genome sequencing},
    month = {December},
    volume = {8},
    author = {Marten J{\"a}ger and Max Schubach and Tomasz Zemojtel and Knut Reinert and Deanna M. Church and Peter N. Robinson},
    number = {1},
    year = {2016},
    abstract = {
    Background
    The last two human genome assemblies have extended the previous linear golden-path paradigm of the human genome to a graph-like model to better represent regions with a high degree of structural variability. The new model offers opportunities to improve the technical validity of variant calling in whole-genome sequencing (WGS).
    Methods
    We developed an algorithm that analyzes the patterns of variant calls in the 178 structurally variable regions of the GRCh38 genome assembly, and infers whether a given sample is most likely to contain sequences from the primary assembly, an alternate locus, or their heterozygous combination at each of these 178 regions. We investigate 121 in-house WGS datasets that have been aligned to the GRCh37 and GRCh38 assemblies.
    Results
    We show that stretches of sequences that are largely but not entirely identical between the primary assembly and an alternate locus can result in multiple variant calls against regions of the primary assembly. In WGS analysis, this results in characteristic and recognizable patterns of variant calls at positions that we term alignable scaffold-discrepant positions (ASDPs). In 121 in-house genomes, on average 51.8{$\pm$}3.8 of the 178 regions were found to correspond best to an alternate locus rather than the primary assembly sequence, and filtering these genomes with our algorithm led to the identification of 7863 variant calls per genome that colocalized with ASDPs. Additionally, we found that 437 of 791 genome-wide association study hits located within one of the regions corresponded to ASDPs.
    Conclusions
    Our algorithm uses the information contained in the 178 structurally variable regions of the GRCh38 genome assembly to avoid spurious variant calls in cases where samples contain an alternate locus rather than the corresponding segment of the primary assembly. These results suggest the great potential of fully incorporating the resources of graph-like genome assemblies into variant calling, but also underscore the importance of developing computational resources that will allow a full reconstruction of the genotype in personal genomes. Our algorithm is freely available at https://github.com/charite/asdpex.},
    url = {http://publications.imp.fu-berlin.de/2004/}
    }
  • T. Marschall, K. Reinert, and (59. authors in total) others, “Computational pan-genomics: status, promises and challenges,” Briefings in bioinformatics, 2016.
    [Bibtex]
    @article{fu_mi_publications1981,
    publisher = {Oxford Journals},
    journal = {Briefings in Bioinformatics},
    title = {Computational pan-genomics: status, promises and challenges},
    month = {October},
    author = {T. Marschall and K. Reinert and (59 authors in total) others},
    year = {2016},
    url = {http://publications.imp.fu-berlin.de/1981/},
    abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel, qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of future technologies and methodologies and review open challenges from the vantage point of the above-mentioned biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the transition from the representation of reference genomes as strings to representations as graphs. We outline how this and other challenges from different application domains translate into common computational problems, point out relevant bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness that a joint approach to computational pan-genomics can help address many of the problems currently faced in various domains. }
    }
  • B. Vatansever, A. Muñoz, C. L. Klein, and K. Reinert, “Development and optimisation of a generic micro lc-esi-ms method for the qualitative and quantitative determination of 30-mer toxic gliadin peptides in wheat flour for food analysis,” Analytical and bioanalytical chemistry, pp. 1-9, 2016.
    [Bibtex]
    @article{fu_mi_publications1976,
    publisher = {Springer Berlin Heidelberg},
    journal = {Analytical and Bioanalytical Chemistry},
    title = {Development and optimisation of a generic micro LC-ESI-MS method for the qualitative and quantitative determination of 30-mer toxic gliadin peptides in wheat flour for food analysis},
    month = {October},
    pages = {1--9},
    author = {B. Vatansever and A. Mu{\~n}oz and C. L. Klein and K. Reinert},
    year = {2016},
    abstract = {We sometimes see manufactured bakery products on the market which are labelled as being gluten free. Why is the content of such gluten proteins of importance for the fabrication of bakery industry and for the products? The gluten proteins represent up to 80 \% of wheat proteins, and they are conventionally subdivided into gliadins and glutenins. Gliadins belong to the proline and glutamine-rich prolamin family. Its role in human gluten intolerance, as a consequence of its harmful effects, is well documented in the scientific literature. The only known therapy so far is a gluten-free diet, and hence, it is important to develop robust and reliable analytical methods to quantitatively assess the presence of the identified peptides causing the so-called coeliac disease. This work describes the development of a new, fast and robust micro ion pair-LC-MS analytical method for the qualitative and quantitative determination of 30-mer toxic gliadin peptides in wheat flour. The use of RapiGest? SF as a denaturation reagent prior to the enzymatic digestion showed to shorten the measuring time. During the optimisation of the enzymatic digestion step, the best 30-mer toxic peptide was identified from the maximum recovery after 3 h of digestion time. The lower limit of quantification was determined to be 0.25 ng/{\ensuremath{\mu}}L. The method has shown to be linear for the selected concentration range of 0.25?3.0 ng/{\ensuremath{\mu}}L. The uncertainty related to reproducibility of measurement procedure, excluding the extraction step, has shown to be 5.0 \% (N = 12). Finally, this method was successfully applied to the quantification of 30-mer toxic peptides from commercial wheat flour with an overall uncertainty under reproducibility conditions of 6.4 \% including the extraction of the gliadin fraction. The results were always expressed as the average of the values from all standard concentrations. Subsequently, the final concentration of the 30-mer toxic peptide in the flour was calculated and expressed in milligrams per gram unit. The determined, calculated concentration of the 30-mer toxic peptide in the flour was found to be 1.29 {$\pm$} 0.37 {\ensuremath{\mu}}g/g in flour (N = 25, sy = 545,075, f = 25 ? 2 (t = 2.069), P = 95 \%, two-sided).},
    url = {http://publications.imp.fu-berlin.de/1976/}
    }

2015

  • S. Aiche, T. Sachsenberg, E. Kenar, M. Walzer, B. Wiswedel, T. Kristl, M. Boyles, A. Duschl, C. G. Huber, M. R. Berthold, K. Reinert, and O. Kohlbacher, “Workflows for automated downstream data analysis and visualization in large-scale computational mass spectrometry,” Proteomics, vol. 15, iss. 8, pp. 1443-1447, 2015.
    [Bibtex]
    @article{fu_mi_publications1505,
    author = {S. Aiche and T. Sachsenberg and E. Kenar and M. Walzer and B. Wiswedel and T. Kristl and M. Boyles and A. Duschl and C. G. Huber and M. R. Berthold and K. Reinert and O. Kohlbacher},
    pages = {1443--1447},
    number = {8},
    year = {2015},
    title = {Workflows for automated downstream data analysis and visualization in large-scale computational mass spectrometry},
    month = {April},
    volume = {15},
    publisher = {Wiley VCH},
    journal = {PROTEOMICS},
    abstract = {MS-based proteomics and metabolomics are rapidly evolving research fields driven by the development of novel instruments, experimental approaches, and analysis methods. Monolithic analysis tools perform well on single tasks but lack the flexibility to cope with the constantly changing requirements and experimental setups. Workflow systems, which combine small processing tools into complex analysis pipelines, allow custom-tailored and flexible data-processing workflows that can be published or shared with collaborators. In this article, we present the integration of established tools for computational MS from the open-source software framework OpenMS into the workflow engine Konstanz Information Miner (KNIME) for the analysis of large datasets and production of high-quality visualizations. We provide example workflows to demonstrate combined data processing and visualization for three diverse tasks in computational MS: isobaric mass tag based quantitation in complex experimental setups, label-free quantitation and identification of metabolites, and quality control for proteomics experiments.},
    url = {http://publications.imp.fu-berlin.de/1505/}
    }
  • M. Holtgrewe, L. Kuchenbecker, and K. Reinert, “Methods for the detection and assembly of novel sequence in high-throughput sequencing data,” Bioinformatics, vol. 31, iss. 12, pp. 1904-1912, 2015.
    [Bibtex]
    @article{fu_mi_publications1506,
    number = {12},
    pages = {1904--1912},
    author = {M. Holtgrewe and L. Kuchenbecker and K. Reinert},
    year = {2015},
    journal = {Bioinformatics},
    title = {Methods for the Detection and Assembly of Novel Sequence in High-Throughput Sequencing Data},
    volume = {31},
    abstract = {Motivation:
    Large insertions of novel sequence are an important type of structural variants. Previous studies used traditional de novo assemblers for assembling non-mapping high-throughput sequencing (HTS) or capillary reads and then tried to anchor them in the reference using paired read information.
    Results:
    We present approaches for detecting insertion breakpoints and targeted assembly of large insertions from HTS paired data: BASIL and ANISE. On near identity repeats that are hard for assemblers, ANISE employs a repeat resolution step. This results in far better reconstructions than obtained by the compared methods. On simulated data, we found our insert assembler to be competitive with the de novo assemblers ABYSS and SGA while yielding already anchored inserted sequence as opposed to unanchored contigs as from ABYSS/SGA. On real-world data, we detected novel sequence in a human individual and thoroughly validated the assembled sequence. ANISE was found to be superior to the competing tool MindTheGap on both simulated and real-world data.
    Availability and implementation: ANISE and BASIL are available for download at http://www.seqan.de/projects/herbarium under a permissive open source license.
    Contact: manuel.holtgrewe@fu-berlin.de or knut.reinert@fu-berlin.de},
    url = {http://publications.imp.fu-berlin.de/1506/}
    }
  • J. Hu and K. Reinert, “Localali: an evolutionary-based local alignment approach to identify functionally conserved modules in multiple networks.,” Bioinformatics, vol. 30, iss. 1, pp. 363-372, 2015.
    [Bibtex]
    @article{fu_mi_publications1457,
    author = {J. Hu and K. Reinert},
    pages = {363--372},
    number = {1},
    year = {2015},
    title = {LocalAli: An Evolutionary-based Local Alignment Approach to Identify Functionally Conserved Modules in Multiple Networks.},
    volume = {30},
    journal = {Bioinformatics},
    publisher = {Oxford University Press},
    abstract = {MOTIVATION:Sequences and protein interaction data are of significance to understand the underlying molecular mechanism of organisms. Local network alignment is one of key systematic ways for predicting protein functions, identifying functional modules, and understanding the phylogeny from these data. Most of currently existing tools, however, encounter their limitations which are mainly concerned with scoring scheme, speed and scalability. Therefore, there are growing demands for sophisticated network evolution models and efficient local alignment algorithms.
    RESULTS:We developed a fast and scalable local network alignment tool so-called LocalAli for the identification of functionally conserved modules in multiple networks. In this algorithm, we firstly proposed a new framework to reconstruct the evolution history of conserved modules based on a maximum-parsimony evolutionary model. By relying on this model, LocalAli facilitates interpretation of resulting local alignments in terms of conserved modules which have been evolved from a common ancestral module through a series of evolutionary events. A meta-heuristic method simulated annealing was used to search for the optimal or near-optimal inner nodes (i.e. ancestral modules) of the evolutionary tree. To evaluate the performance and the statistical significance, LocalAli were tested on a total of 26 real datasets and 1040 randomly generated datasets. The results suggest that LocalAli outperforms all existing algorithms in terms of coverage, consistency and scalability, meanwhile retains a high precision in the identification of functionally coherent subnetworks.
    AVAILABILITY:The source code and test datasets are freely available for download under the GNU GPL v3 license at https://code.google.com/p/localali/.
    CONTACT:jialu.hu@fu-berlin.de or knut.reinert@fu-berlin.de.},
    url = {http://publications.imp.fu-berlin.de/1457/}
    }
  • L. Kuchenbecker, M. Nienen, J. Hecht, A. U. Neumann, N. Babel, K. Reinert, and P. N. Robinson, “Imseq – a fast and error aware approach to immunogenetic sequence analysis,” Bioinformatics, vol. 31, iss. 18, pp. 2963-2971, 2015.
    [Bibtex]
    @article{fu_mi_publications1551,
    title = {IMSEQ - a fast and error aware approach to immunogenetic sequence analysis},
    volume = {31},
    pages = {2963--2971},
    number = {18},
    author = {L. Kuchenbecker and M. Nienen and J. Hecht and A. U. Neumann and N. Babel and K. Reinert and P. N. Robinson},
    year = {2015},
    journal = {Bioinformatics},
    publisher = {Oxford University Press (online advanced access)},
    abstract = {Motivation: Recombined T and B cell receptor repertoires are increasingly being studied using next generation sequencing (NGS) in order to interrogate the repertoire composition as well as changes in the distribution of receptor clones under different physiological and disease states. This type of analysis requires efficient and unambiguous clonotype assignment to a large number of NGS read sequences, including the identification of the incorporated V and J gene segments and the CDR3 sequence. Current tools have deficits with respect to performance, accuracy and documentation of their underlying algorithms and usage.
    Results: We present IMSEQ, a method to derive clonotype repertoires from next generation sequencing data with sophisticated routines for handling errors stemming from PCR and sequencing artefacts. The application can handle different kinds of input data originating from single- or paired-end sequencing in different configurations and is generic regarding the species and gene of interest. We have carefully evaluated our method with simulated and real world data and show that IMSEQ is superior to other tools with respect to its clonotyping as well as standalone error correction and runtime performance.
    Availability: IMSEQ was implemented in C++ using the SeqAn library for efficient sequence analysis. It is freely available under the GPLv2 open source license and can be downloaded at www.imtools.org. },
    url = {http://publications.imp.fu-berlin.de/1551/}
    }
  • K. Reinert, B. Langmead, D. Weese, and D. J. Evers, “Alignment of next-generation sequencing reads,” Annual review of genomics and human genetics, vol. 16, iss. 1, pp. 133-151, 2015.
    [Bibtex]
    @article{fu_mi_publications1544,
    journal = {Annual Review of Genomics and Human Genetics},
    volume = {16},
    month = {May},
    title = {Alignment of Next-Generation Sequencing Reads},
    year = {2015},
    number = {1},
    pages = {133--151},
    author = {K. Reinert and B. Langmead and D. Weese and D.J. Evers},
    abstract = {High-throughput DNA sequencing has considerably changed the possibilities for conducting biomedical research by measuring billions of short DNA or RNA fragments. A central computational problem, and for many applications a first step, consists of determining where the fragments came from
    in the original genome. In this article, we review the main techniques for generating the fragments, the main applications, and the main algorithmic ideas for computing a solution to the read alignment problem. In addition, we describe pitfalls and difficulties connected to determining the correct positions of reads.},
    url = {http://publications.imp.fu-berlin.de/1544/}
    }
  • L. Schultz, M. -G. Zurich, M. Culot, A. da Costa, C. Landry, P. Bellwon, T. Kristl, K. Hörmann, S. Ruzek, S. Aiche, K. Reinert, C. Bielow, F. Gosselet, R. Cecchelli, C. G. Huber, O. H. -U. Schroeder, A. Gramowski-Voss, D. G. Weiss, and A. Bal-Price, “Evaluation of drug-induced neurotoxicity based on metabolomics, proteomics and electrical activity measurements in complementary cns in vitro models,” Toxicology in vitro, vol. 30, iss. 1, pp. 138-165, 2015.
    [Bibtex]
    @article{fu_mi_publications1736,
    journal = {Toxicology in Vitro},
    note = {Online publication: May 2015},
    publisher = {Elsevier B.V.},
    month = {December},
    volume = {30},
    title = {Evaluation of drug-induced neurotoxicity based on metabolomics, proteomics and electrical activity measurements in complementary CNS in vitro models},
    year = {2015},
    author = {L. Schultz and M.-G. Zurich and M. Culot and A. da Costa and C. Landry and P. Bellwon and T. Kristl and K. H{\"o}rmann and S. Ruzek and S. Aiche and K. Reinert and C. Bielow and F. Gosselet and R. Cecchelli and C. G. Huber and O. H.-U. Schroeder and A. Gramowski-Voss and D. G. Weiss and A. Bal-Price},
    number = {1},
    pages = {138--165},
    keywords = {In vitro blood brain barrier; MEA; Neuronal network culture; OMICs; Drug development; 3D brain culture},
    abstract = {The present study was performed in an attempt to develop an in vitro integrated testing strategy (ITS) to evaluate drug-induced neurotoxicity. A number of endpoints were analyzed using two complementary brain cell culture models and an in vitro blood?brain barrier (BBB) model after single and repeated exposure treatments with selected drugs that covered the major biological, pharmacological and neuro-toxicological responses. Furthermore, four drugs (diazepam, cyclosporine A, chlorpromazine and amiodarone) were tested more in depth as representatives of different classes of neurotoxicants, inducing toxicity through different pathways of toxicity.
    The developed in vitro BBB model allowed detection of toxic effects at the level of BBB and evaluation of drug transport through the barrier for predicting free brain concentrations of the studied drugs. The measurement of neuronal electrical activity was found to be a sensitive tool to predict the neuroactivity and neurotoxicity of drugs after acute exposure. The histotypic 3D re-aggregating brain cell cultures, containing all brain cell types, were found to be well suited for OMICs analyses after both acute and long term treatment.
    The obtained data suggest that an in vitro ITS based on the information obtained from BBB studies and combined with metabolomics, proteomics and neuronal electrical activity measurements performed in stable in vitro neuronal cell culture systems, has high potential to improve current in vitro drug-induced neurotoxicity evaluation.
    Abbreviations:
    BBB, blood brain barrier; DMSO, dimethylsulfoxide; EC, endothelial cells; DIV, day in vitro; IPA, Ingenuity Pathway Analysis; ITS, integrated testing strategy; LY, Lucifer Yellow; MEA, micro-electrode array; RH, Ringer HEPES medium; SPSS, Statistical Package for the Social Sciences
    },
    url = {http://publications.imp.fu-berlin.de/1736/}
    }
  • A. Wilmes, C. Bielow, C. Ranninger, P. Bellwon, L. Aschauer, A. Limonciel, H. Chassaigne, T. Kristl, S. Aiche, C. G. Huber, C. Guillou, P. Hewitt, M. O. Leonard, W. Dekant, F. Bois, and P. Jennings, “Mechanism of cisplatin proximal tubule toxicity revealed by integrating transcriptomics, proteomics, metabolomics and biokinetics,” Toxicology in vitro, vol. 30, iss. 1, Part A, pp. 117-127, 2015.
    [Bibtex]
    @article{fu_mi_publications1488,
    author = {Anja Wilmes and Chris Bielow and Christina Ranninger and Patricia Bellwon and Lydia Aschauer and Alice Limonciel and Hubert Chassaigne and Theresa Kristl and Stephan Aiche and Christian G. Huber and Claude Guillou and Philipp Hewitt and Martin O. Leonard and Wolfgang Dekant and Frederic Bois and Paul Jennings},
    number = {1, Part A},
    pages = {117--127},
    year = {2015},
    title = {Mechanism of cisplatin proximal tubule toxicity revealed by integrating transcriptomics, proteomics, metabolomics and biokinetics},
    month = {December},
    volume = {30},
    journal = {Toxicology in Vitro},
    publisher = {Elsevier B.V.},
    abstract = {Cisplatin is one of the most widely used chemotherapeutic agents for the treatment of solid tumours. The major dose-limiting factor is nephrotoxicity, in particular in the proximal tubule. Here, we use an integrated omics approach, including transcriptomics, proteomics and metabolomics coupled to biokinetics to identify cell stress response pathways induced by cisplatin. The human renal proximal tubular cell line RPTEC/TERT1 was treated with sub-cytotoxic concentrations of cisplatin (0.5 and 2{\ensuremath{\mu}}M) in a daily repeat dose treating regime for up to 14days. Biokinetic analysis showed that cisplatin was taken up from the basolateral compartment, transported to the apical compartment, and accumulated in cells over time. This is in line with basolateral uptake of cisplatin via organic cation transporter 2 and bioactivation via gamma-glutamyl transpeptidase located on the apical side of proximal tubular cells. Cisplatin affected several pathways including, p53 signalling, Nrf2 mediated oxidative stress response, mitochondrial processes, mTOR and AMPK signalling. In addition, we identified novel pathways changed by cisplatin, including eIF2 signalling, actin nucleation via the ARP/WASP complex and regulation of cell polarization. In conclusion, using an integrated omic approach together with biokinetics we have identified both novel and established mechanisms of cisplatin toxicity.},
    url = {http://publications.imp.fu-berlin.de/1488/}
    }

2014

  • H. Hauswedell, J. Singer, and K. Reinert, “Lambda: the local aligner for massive biological data,” Bioinformatics (oxford, england), vol. 30, iss. 17, p. i349–i355, 2014.
    [Bibtex]
    @article{fu_mi_publications1453,
    journal = {Bioinformatics (Oxford, England)},
    publisher = {Oxford University Press},
    volume = {30},
    month = {September},
    title = {Lambda: the local aligner for massive biological data},
    year = {2014},
    number = {17},
    pages = {i349--i355},
    author = {H. Hauswedell and J. Singer and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/1453/},
    abstract = {MOTIVATION:Next-generation sequencing technologies produce unprecedented amounts of data, leading to completely new research fields. One of these is metagenomics, the study of large-size DNA samples containing a multitude of diverse organisms. A key problem in metagenomics is to functionally and taxonomically classify the sequenced DNA, to which end the well-known BLAST program is usually used. But BLAST has dramatic resource requirements at metagenomic scales of data, imposing a high financial or technical burden on the researcher. Multiple attempts have been made to overcome these limitations and present a viable alternative to BLAST.RESULTS:In this work we present Lambda, our own alternative for BLAST in the context of sequence classification. In our tests, Lambda often outperforms the best tools at reproducing BLAST's results and is the fastest compared with the current state of the art at comparable levels of sensitivity.
    AVAILABILITY AND IMPLEMENTATION:Lambda was implemented in the SeqAn open-source C++ library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/lambda.
    CONTACT:hannes.hauswedell@fu-berlin.de
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.}
    }
  • J. Hu, B. Kehr, and K. Reinert, “Netcoffee: a fast and accurate global alignment approach to identify functionally conserved proteins in multiple networks,” Bioinformatics, vol. 30, iss. 4, pp. 540-548, 2014.
    [Bibtex]
    @article{fu_mi_publications1399,
    title = {NetCoffee: a fast and accurate global alignment approach to identify functionally conserved proteins in multiple networks},
    month = {February},
    volume = {30},
    author = {J. Hu and B. Kehr and K. Reinert},
    number = {4},
    pages = {540--548},
    year = {2014},
    journal = {Bioinformatics},
    publisher = {Oxford University Press},
    abstract = {Motivation: Owing to recent advancements in high-throughput technologies, protein?protein interaction networks of more and more species become available in public databases. The question of how to identify functionally conserved proteins across species attracts a lot of attention in computational biology. Network alignments provide a systematic way to solve this problem. However, most existing alignment tools encounter limitations in tackling this problem. Therefore, the demand for faster and more efficient alignment tools is growing.
    Results: We present a fast and accurate algorithm, NetCoffee, which allows to find a global alignment of multiple protein?protein interaction networks. NetCoffee searches for a global alignment by maximizing a target function using simulated annealing on a set of weighted bipartite graphs that are constructed using a triplet approach similar to T-Coffee. To assess its performance, NetCoffee was applied to four real datasets. Our results suggest that NetCoffee remedies several limitations of previous algorithms, outperforms all existing alignment tools in terms of speed and nevertheless identifies biologically meaningful alignments.
    Availability: The source code and data are freely available for download under the GNU GPL v3 license at https://code.google.com/p/netcoffee/. },
    url = {http://publications.imp.fu-berlin.de/1399/}
    }
  • B. Kehr, K. Trappe, M. Holtgrewe, and K. Reinert, “Genome alignment with graph data structures: a comparison,” Bmc bioinformatics, vol. 15, 2014.
    [Bibtex]
    @article{fu_mi_publications1437,
    volume = {15},
    month = {April},
    journal = {BMC Bioinformatics},
    publisher = {BioMed Central},
    title = {Genome alignment with graph data structures: a comparison},
    year = {2014},
    author = {B. Kehr and K. Trappe and M. Holtgrewe and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/1437/},
    abstract = {Background
    Recent advances in rapid, low-cost sequencing have opened up the opportunity to study complete genome sequences. The computational approach of multiple genome alignment allows investigation of evolutionarily related genomes in an integrated fashion, providing a basis for downstream analyses such as rearrangement studies and phylogenetic inference.
    Graphs have proven to be a powerful tool for coping with the complexity of genome-scale sequence alignments. The potential of graphs to intuitively represent all aspects of genome alignments led to the development of graph-based approaches for genome alignment. These approaches construct a graph from a set of local alignments, and derive a genome alignment through identification and removal of graph substructures that indicate errors in the alignment.
    Results
    We compare the structures of commonly used graphs in terms of their abilities to represent alignment information. We describe how the graphs can be transformed into each other, and identify and classify graph substructures common to one or more graphs. Based on previous approaches, we compile a list of modifications that remove these substructures.
    Conclusion
    We show that crucial pieces of alignment information, associated with inversions and duplications, are not visible in the structure of all graphs. If we neglect vertex or edge labels, the graphs differ in their information content. Still, many ideas are shared among all graph-based approaches. Based on these findings, we outline a conceptual framework for graph-based genome alignment that can assist in the development of future genome alignment tools. }
    }
  • V. Neu, C. Bielow, K. Reinert, and C. G. Huber, “Ultrahigh-performance liquid chromatography-ultraviolet absorbance detection-high-resolution-mass spectrometry combined with automated data processing for studying the kinetics of oxidative thermal degradation of thyroxine in the solid state,” Journal of chromatography a, vol. 1371, pp. 196-203, 2014.
    [Bibtex]
    @article{fu_mi_publications1463,
    month = {December},
    volume = {1371},
    title = {Ultrahigh-performance liquid chromatography-ultraviolet absorbance detection-high-resolution-mass spectrometry combined with automated data processing for studying the kinetics of oxidative thermal degradation of thyroxine in the solid state},
    journal = {Journal of chromatography A},
    year = {2014},
    author = {V. Neu and C. Bielow and K. Reinert and C. G. Huber},
    pages = {196--203},
    url = {http://publications.imp.fu-berlin.de/1463/},
    abstract = {Received 29 June 2014
    Received in revised form
    29 September 2014
    Accepted 24 October 2014 Available online 30 October 2014
    Keywords:
    Thyroxine
    Ultrahigh-performance liquid chromatography
    Electrospray ionization Orbitrap mass spectrometry
    Kinetics
    Drug degradation
    Bioinformatics
    1. Introduction
    Levothyroxine (T4) is a well-known and widely used drug for the treatment of hypothyroidism. The therapy is usually based on substitution of the natural hormone by a long-term treatment with synthetic levothyroxine [1]. It is a narrow therapeutic index drug, for which individual dosage levels of patients need to be established over several weeks. Consequently, products differing in content of levothyroxine due to compound instability or problems in formu- lation can lead to wrong medication and potentially cause severe health problems [2,3].
    ? Corresponding author. Tel.:+43 0 662 8044 5704; fax: +43 0 662 8044 5751. E-mail addresses: volker.a.neu@basf.com (V. Neu), chris.bielow@fu-berlin.de (C. Bielow), reinert@inf.fu-berlin.de (K. Reinert), c.huber@sbg.ac.at (C.G. Huber).
    http://dx.doi.org/10.1016/j.chroma.2014.10.071
    0021-9673/{\copyright} 2014 Elsevier B.V. All rights reserved.
    abstract
    Levothyroxine as active pharmaceutical ingredient of formulations used for the treatment of hypothy- roidism is distributed worldwide and taken by millions of people. An important issue in terms of compound stability is its capability to react with ambient oxygen, especially in case of long term com- pound storage at elevated temperature. In this study we demonstrate that ultrahigh-performance liquid chromatography coupled to UV spectrometry and high-resolution mass spectrometry (UHPLC-UV-HRMS) represent very useful approaches to investigate the influence of ambient oxygen on the degradation kinet- ics of levothyroxine in the solid state at enhanced degradation conditions. Moreover, the impurity pattern of oxidative degradation of levothyroxine is elucidated and classified with respect to degradation kinetics at different oxygen levels. Kinetic analysis of thyroxine bulk material at 100 ? C reveals bi-phasic degra- dation kinetics with a distinct change in degradation phases dependent on the availability of oxygen. The results clearly show that contact of the bulk material to ambient oxygen is a key factor for fast compound degradation. Furthermore, the combination of time-resolved HRMS data and automated data processing is shown to allow insights into the kinetics and mechanism of impurity formation on individual com- pound basis. By comparing degradation profiles, four main classes of profiles linked to reaction pathways of thyroxine degradation were identifiable. Finally, we show the capability of automated data process- ing for the matching of different stressing conditions, in order to extract information about mechanistic similarities. As a result, degradation kinetics is influenced by factors like availability of oxygen, stressing time, or stressing temperature, while the degradation mechanisms appear to be conserved.}
    }
  • R. Rahn, D. Weese, and K. Reinert, “Journaled string tree–a scalable data structure for analyzing thousands of similar genomes on your laptop,” Bioinformatics, 2014.
    [Bibtex]
    @article{fu_mi_publications1448,
    month = {July},
    journal = {Bioinformatics},
    title = {Journaled string tree--a scalable data structure for analyzing thousands of similar genomes on your laptop},
    year = {2014},
    author = {R. Rahn and D. Weese and K. Reinert},
    abstract = {Motivation: Next-generation sequencing (NGS) has revolutionized biomedical research in the past decade and led to a continuous stream of developments in bioinformatics, addressing the need for fast and space-efficient solutions for analyzing NGS data. Often researchers need to analyze a set of genomic sequences that stem from closely related species or are indeed individuals of the same species. Hence, the analyzed sequences are similar. For analyses where local changes in the examined sequence induce only local changes in the results, it is obviously desirable to examine identical or similar regions not repeatedly.
    Results: In this work, we provide a datatype that exploits data parallelism inherent in a set of similar sequences by analyzing shared regions only once. In real-world experiments, we show that algorithms that otherwise would scan each reference sequentially can be speeded up by a factor of 115.
    Availability: The data structure and associated tools are publicly available at http://www.seqan.de/projects/jst and are part of SeqAn, the C++ template library for sequence analysis.
    Contact: rene.rahn@fu-berlin.de},
    url = {http://publications.imp.fu-berlin.de/1448/}
    }
  • M. H. Schulz, D. Weese, M. Holtgrewe, V. Dimitrova, S. Niu, K. Reinert, and H. Richard, “Fiona: a parallel and automatic strategy for read error correction,” Bioinformatics, vol. 30, iss. 17, p. i356–i363, 2014.
    [Bibtex]
    @article{fu_mi_publications1451,
    year = {2014},
    number = {17},
    pages = {i356--i363},
    author = {M. H. Schulz and D. Weese and M. Holtgrewe and V. Dimitrova and S. Niu and K. Reinert and H. Richard},
    volume = {30},
    journal = {Bioinformatics},
    title = {Fiona: a parallel and automatic strategy for read error correction},
    abstract = {Motivation: Automatic error correction of high-throughput sequencing data can have a dramatic impact on the amount of usable base pairs and their quality. It has been shown that the performance of tasks such as de novo genome assembly and SNP calling can be dramatically improved after read error correction. While a large number of methods specialized for correcting substitution errors as found in Illumina data exist, few methods for the correction of indel errors, common to technologies like 454 or Ion Torrent, have been proposed.Results: We present Fiona, a new stand-alone read error{\^a}??correction method. Fiona provides a new statistical approach for sequencing error detection and optimal error correction and estimates its parameters automatically. Fiona is able to correct substitution, insertion and deletion errors and can be applied to any sequencing technology. It uses an efficient implementation of the partial suffix array to detect read overlaps with different seed lengths in parallel. We tested Fiona on several real datasets from a variety of organisms with different read lengths and compared its performance with state-of-the-art methods. Fiona shows a constantly higher correction accuracy over a broad range of datasets from 454 and Ion Torrent sequencers, without compromise in speed.Conclusion: Fiona is an accurate parameter-free read error{\^a}??correction method that can be run on inexpensive hardware and can make use of multicore parallelization whenever available. Fiona was implemented using the SeqAn library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/fiona.Contact: mschulz@mmci.uni-saarland.de or hugues.richard@upmc.frSupplementary information: Supplementary data are available at Bioinformatics online.},
    url = {http://publications.imp.fu-berlin.de/1451/}
    }
  • K. Trappe, A. -K. Emde, H. -C. Ehrlich, and K. Reinert, “Gustaf: detecting and correctly classifying svs in the ngs twilight zone,” Bioinformatics, vol. 30, iss. 24, pp. 3484-3490, 2014.
    [Bibtex]
    @article{fu_mi_publications1455,
    publisher = {Oxford University Press},
    journal = {Bioinformatics},
    year = {2014},
    author = {K. Trappe and A.-K. Emde and H.-C. Ehrlich and K. Reinert},
    pages = {3484--3490},
    number = {24},
    month = {July},
    volume = {30},
    title = {Gustaf: Detecting and correctly classifying SVs in the NGS twilight zone},
    abstract = {MOTIVATION:
    The landscape of structural variation (SV) including complex duplication and translocation patterns is far from resolved. SV detection tools usually exhibit low agreement, are often geared toward certain types or size ranges of variation and struggle to correctly classify the type and exact size of SVs.
    RESULTS:
    We present Gustaf (Generic mUlti-SpliT Alignment Finder), a sound generic multi-split SV detection tool that detects and classifies deletions, inversions, dispersed duplications and translocations of {\ensuremath{<}}span class='mathrm'{\ensuremath{>}}ge{\ensuremath{<}}/span{\ensuremath{>}}30 bp. Our approach is based on a generic multi-split alignment strategy that can identify SV breakpoints with base pair resolution. We show that Gustaf correctly identifies SVs, especially in the range from 30 to 100 bp, which we call the next-generation sequencing (NGS) twilight zone of SVs, as well as larger SVs \&gt;500 bp. Gustaf performs better than similar tools in our benchmark and is furthermore able to correctly identify size and location of dispersed duplications and translocations, which otherwise might be wrongly classified, for example, as large deletions. Availability and implementation: Project information, paper benchmark and source code are available via http://www.seqan.de/projects/gustaf/.
    CONTACT:kathrin.trappe@fu-berlin.de.},
    url = {http://publications.imp.fu-berlin.de/1455/}
    }
  • M. Walzer, L. E. Pernas, S. Nasso, W. Bittremieux, S. Nahnsen, P. Kelchtermans, P. Pichler, H. W. P. van den Toorn, A. Staes, J. Vandenbussche, M. Mazanek, T. Taus, R. A. Scheltema, C. D. Kelstrup, L. Gatto, B. van Breukelen, S. Aiche, D. Valkenborg, K. Laukens, K. S. Lilley, J. V. Olsen, A. J. R. Heck, K. Mechtler, R. Aebersold, K. Gevaert, J. A. Vizcaino, H. Hermjakob, O. Kohlbacher, and L. Martens, “Qcml: an exchange format for quality control metrics from mass spectrometry experiments,” Molecular & cellular proteomics, vol. 13, iss. 8, pp. 1905-1913, 2014.
    [Bibtex]
    @article{fu_mi_publications1449,
    journal = {Molecular \& Cellular Proteomics},
    pages = {1905--1913},
    number = {8},
    author = {M. Walzer and L. E. Pernas and S. Nasso and W. Bittremieux and S. Nahnsen and P. Kelchtermans and P. Pichler and H. W. P. van den Toorn and A. Staes and J. Vandenbussche and M. Mazanek and T. Taus and R. A. Scheltema and C. D. Kelstrup and L. Gatto and B. van Breukelen and S. Aiche and D. Valkenborg and K. Laukens and K. S. Lilley and J. V. Olsen and A. J. R. Heck and K. Mechtler and R. Aebersold and K. Gevaert and J. A. Vizcaino and H. Hermjakob and O. Kohlbacher and L. Martens},
    year = {2014},
    title = {qcML: An Exchange Format for Quality Control Metrics from Mass Spectrometry Experiments},
    volume = {13},
    month = {August},
    url = {http://publications.imp.fu-berlin.de/1449/},
    abstract = {Quality control is increasingly recognized as a crucial
    aspect of mass spectrometry based proteomics. Several
    recent papers discuss relevant parameters for quality
    control and present applications to extract these from the
    instrumental raw data. What has been missing, however,
    is a standard data exchange format for reporting these
    performance metrics. We therefore developed the qcML
    format, an XML-based standard that follows the design
    principles of the related mzML, mzIdentML, mzQuantML,
    and TraML standards from the HUPO-PSI (Proteomics
    Standards Initiative). In addition to the XML format, we
    also provide tools for the calculation of a wide range of
    quality metrics as well as a database format and interconversion tools, so that existing LIMS systems can easily add relational storage of the quality control data to their existing schema. We here describe the qcML specification, along with possible use cases and an illustrative example of the subsequent analysis possibilities. All information about qcML is available at http://code.google.com/p/qcml. Molecular \& Cellular Proteomics 13: 10.1074/mcp.M113.035907, 1905?1913, 2014.}
    }
  • S. Wandelt, D. Deng, S. Gerdjikov, S. Mishra, P. Mitankin, M. Patil, E. Siragusa, A. Tiskin, W. Wang, J. Wang, and U. Leser, “State-of-the-art in string similarity search and join,” Sigmod record, vol. 43, iss. 1, 2014.
    [Bibtex]
    @article{fu_mi_publications1401,
    month = {March},
    volume = {43},
    title = {State-of-the-art in String Similarity Search and Join},
    journal = {SIGMOD Record},
    year = {2014},
    author = {S. Wandelt and D. Deng and S. Gerdjikov and S. Mishra and P. Mitankin and M. Patil and E. Siragusa and A. Tiskin and W. Wang and J. Wang and U. Leser},
    number = {1},
    url = {http://publications.imp.fu-berlin.de/1401/}
    }

2013

  • S. Aiche, “Inferring proteolytic processes from mass spectrometry time series data,” PhD Thesis, Berlin, Germany, 2013.
    [Bibtex]
    @phdthesis{fu_mi_publications1445,
    author = {S. Aiche},
    year = {2013},
    address = {Berlin, Germany},
    title = {Inferring Proteolytic Processes from Mass Spectrometry Time Series Data},
    month = {September},
    school = {Freie Universit{\"a}t Berlin},
    url = {http://publications.imp.fu-berlin.de/1445/}
    }
  • S. Andreotti, K. Reinert, and S. Canzar, “The duplication-loss small phylogeny problem: from cherries to trees.,” Journal of computational biology, vol. 20, iss. 9, pp. 643-59, 2013.
    [Bibtex]
    @article{fu_mi_publications1454,
    title = {The duplication-loss small phylogeny problem: from cherries to trees.},
    volume = {20},
    month = {September},
    number = {9},
    pages = {643--59},
    author = {S. Andreotti and K. Reinert and S. Canzar},
    year = {2013},
    journal = {Journal of Computational Biology},
    abstract = {Abstract The reconstruction of the history of evolutionary genome-wide events among a set of related organisms is of great biological interest since it can help to reveal the genomic basis of phenotypes. The sequencing of whole genomes faciliates the study of gene families that vary in size through duplication and loss events, like transfer RNA. However, a high sequence similarity often does not allow one to distinguish between orthologs and paralogs. Previous methods have addressed this difficulty by taking into account flanking regions of members of a family independently. We go one step further by inferring the order of genes of (a set of) families for ancestral genomes by considering the order of these genes on sequenced genomes. We present a novel branch-and-cut algorithm to solve the two species small phylogeny problem in the evolutionary model of duplications and losses. On average, our implementation, DupLoCut, improves the running time of a recently proposed method in the experiments on six Vibrionaceae lineages by a factor of {\~A}?\&lt;88\&gt;{\^A}?200. Besides the mere improvement in running time, the efficiency of our approach allows us to extend our model from cherries of a species tree, that is, subtrees with two leaves, to the median of three species setting. Being able to determine the median of three species is of key importance to one of the most common approaches to ancestral reconstruction, and our experiments show that its repeated computation considerably reduces the number of duplications and losses along the tree both on simulated instances comprising 128 leaves and a set of Bacillus genomes. Furthermore, in our simulations we show that a reduction in cost goes hand in hand with an improvement of the predicted ancestral genomes. Finally, we prove that the small phylogeny problem in the duplication-loss model is NP-complete already for two species.},
    url = {http://publications.imp.fu-berlin.de/1454/}
    }
  • S. Nahnsen, C. Bielow, K. Reinert, and O. Kohlbacher, “Tools for label-free peptide quantification,” Molecular & cellular proteomics, vol. 12, iss. 3, pp. 549-556, 2013.
    [Bibtex]
    @article{fu_mi_publications1473,
    journal = {Molecular \& Cellular Proteomics},
    title = {Tools for Label-free Peptide Quantification},
    volume = {12},
    month = {March},
    number = {3},
    pages = {549--556},
    author = {S. Nahnsen and C. Bielow and K. Reinert and O. Kohlbacher},
    year = {2013},
    abstract = {The increasing scale and complexity of quantitative proteomics studies complicate subsequent analysis of the acquired data. Untargeted label-free quantification, based either on feature intensities or on spectral counting, is a method that scales particularly well with respect to the number of samples. It is thus an excellent alternative to labeling techniques. In order to profit from this scalability, however, data analysis has to cope with large amounts of data, process them automatically, and do a thorough statistical analysis in order to achieve reliable results. We review the state of the art with respect to computational tools for label-free quantification in untargeted proteomics. The two fundamental approaches are feature-based quantification, relying on the summed-up mass spectrometric intensity of peptides, and spectral counting, which relies on the number of MS/MS spectra acquired for a certain protein. We review the current algorithmic approaches underlying some widely used software packages and briefly discuss the statistical strategies for analyzing the data.},
    url = {http://publications.imp.fu-berlin.de/1473/}
    }
  • V. Neu, C. Bielow, I. Gostomski, R. Wintringer, R. Braun, K. Reinert, P. Schneider, H. Stuppner, and C. Huber, “Rapid and comprehensive impurity profiling of synthetic thyroxine by ultrahigh-performance liquid chromatography?high-resolution mass spectrometry,” Analytical chemistry, vol. 85, iss. 6, pp. 3309-3317, 2013.
    [Bibtex]
    @article{fu_mi_publications1397,
    publisher = {American Chemical Society},
    journal = {Analytical Chemistry},
    month = {February},
    volume = {85},
    title = {Rapid and Comprehensive Impurity Profiling of Synthetic Thyroxine by Ultrahigh-Performance Liquid Chromatography?High-Resolution Mass Spectrometry},
    year = {2013},
    author = {V. Neu and C. Bielow and I. Gostomski and R. Wintringer and R. Braun and K. Reinert and P. Schneider and H. Stuppner and C. Huber},
    pages = {3309--3317},
    number = {6},
    url = {http://publications.imp.fu-berlin.de/1397/},
    abstract = {Rapid and efficient quality control according to the public authority regulations is mandatory to guarantee safety of the pharmaceuticals and to save resources in the pharmaceutical industry. In the case of so-called "grandfather products" like the synthetic thyroid hormone thyroxine, strict regulations enforce a detailed chemical analysis in order to characterize potentially toxic or pharmacologically relevant impurities. We report a straightforward workflow for the comprehensive impurity profiling of synthetic thyroid hormones and impurities employing ultrahigh-performance liquid chromatography (UHPLC) hyphenated to high-resolution mass spectrometry (HRMS). Five different batches of synthetic thyroxin were analyzed resulting in the detection of 71 impurities within 3 min total analysis time. Structural elucidation of the compounds was accomplished via a combination of accurate mass measurements, computer based calculations of molecular formulas, multistage high-resolution mass spectrometry (HRMS(n)), and nuclear magnetic resonance spectroscopy, which enabled the identification of 71 impurities, of which 47 have been unknown so far. Thirty of the latter were structurally elucidated, including products of deiodination, aliphatic chain oxidation, as well as dimeric compounds as new class of thyroid hormone derivatives. Limits of detection for the thyroid compounds were in the 6 ng/mL range for negative electrospray ionization mass spectrometric detection in full scan mode. Within day and day-to-day repeatabilities of retention times and peak areas were below 0.5\% and 3.5\% R.SD. The performance characteristics of the method in terms of robustness and information content clearly show that UHPLC-HRMS is adequate for the rapid and reliable detection, identification, and semiquantitative determination of trace levels of impurities in synthetic pharmaceuticals.}
    }
  • V. Neu, C. Bielow, P. Schneider, K. Reinert, H. Stuppner, and C. Huber, “Investigation of reaction mechanisms of drug degradation in the solid state: a kinetic study implementing ultrahigh-performance liquid chromatography and high-resolution mass spectrometry for thermally stressed thyroxine,” Analytical chemistry, vol. 85, iss. 4, pp. 2385-2390, 2013.
    [Bibtex]
    @article{fu_mi_publications1398,
    year = {2013},
    author = {V. Neu and C. Bielow and P. Schneider and K. Reinert and H. Stuppner and C. Huber},
    number = {4},
    pages = {2385--2390},
    month = {January},
    volume = {85},
    title = {Investigation of Reaction Mechanisms of Drug Degradation in the Solid State: A Kinetic Study Implementing Ultrahigh-Performance Liquid Chromatography and High-Resolution Mass Spectrometry for Thermally Stressed Thyroxine},
    journal = {Analytical Chemistry},
    publisher = {American Chemical Society},
    url = {http://publications.imp.fu-berlin.de/1398/},
    abstract = {A reaction scheme was derived for the thermal degradation of thyroxine in the solid state, using data obtained from ultrahigh-performance liquid chromatography and high-resolution mass spectrometry (UHPLC-HRMS). To study the reaction mechanism and kinetics of the thermal degradation of the pharmaceutical in the solid state, a workflow was developed by generating compound-specific, time-dependent degradation or formation curves of at least 13 different degradation products. Such curves allowed one to distinguish between first- and second-generation degradation products, as well as impurities resulting from chemical synthesis. The structures of the degradation products were derived from accurate molecular masses and multistage mass spectrometry. Deiodination and oxidative side chain degradation were found to be the major degradation reactions, resulting in the formation of deiodinated thyroxines, as well as acetic acid, benzoic acid, formaldehyde, acetamide, hydroxyacetic acid, oxoacetic acid, hydroxyacetamide, or oxoacetamide derivatives of thyroxine or deiodinated thyroxine. Upon additional structural verification of mass spectrometric data using nuclear magnetic resonance spectroscopy, this comprehensive body of data sheds light on an elaborate, radical-driven reaction scheme, explaining the presence or formation of impurities in thermally stressed thyroxine.}
    }
  • E. Siragusa, D. Weese, and K. Reinert, “Fast and accurate read mapping with approximate seeds and multiple backtracking,” Oxford journals, vol. 41, iss. 7, p. e78, 2013.
    [Bibtex]
    @article{fu_mi_publications1161,
    title = {Fast and accurate read mapping with approximate seeds and multiple backtracking},
    month = {January},
    volume = {41},
    author = {E. Siragusa and D. Weese and K. Reinert},
    pages = {e78},
    number = {7},
    year = {2013},
    publisher = {Oxford University Press},
    journal = {Oxford Journals},
    url = {http://publications.imp.fu-berlin.de/1161/},
    abstract = {We present Masai, a read mapper representing the state-of-the-art in terms of speed and accuracy. Our tool is an order of magnitude faster than RazerS 3 and mrFAST, 2?4 times faster and more accurate than Bowtie 2 and BWA. The novelties of our read mapper are filtration with approximate seeds and a method for multiple backtracking. Approximate seeds, compared with exact seeds, increase filtration specificity while preserving sensitivity. Multiple backtracking amortizes the cost of searching a large set of seeds by taking advantage of the repetitiveness of next-generation sequencing data. Combined together, these two methods significantly speed up approximate search on genomic data sets. Masai is implemented in C++ using the SeqAn library. The source code is distributed under the BSD license and binaries for Linux, Mac OS X and Windows can be freely downloaded from http://www.seqan.de/projects/masai.}
    }
  • E. Siragusa, D. Weese, and K. Reinert, “Scalable string similarity search/join with approximate seeds and multiple backtracking,” in Proceedings of the joint edbt/icdt 2013 workshops, New York, NY, USA, 2013, pp. 370-374.
    [Bibtex]
    @inproceedings{fu_mi_publications1225,
    booktitle = {Proceedings of the Joint EDBT/ICDT 2013 Workshops},
    publisher = {ACM},
    pages = {370--374},
    author = {E. Siragusa and D. Weese and K. Reinert},
    address = {New York, NY, USA},
    year = {2013},
    title = {Scalable string similarity search/join with approximate seeds and multiple backtracking},
    series = {EDBT '13},
    url = {http://publications.imp.fu-berlin.de/1225/},
    keywords = {approximate seeds, backtracking, banded Myers bit-vector, banded dynamic programming, filtration, radix tree, suffix tree},
    abstract = {We present in this paper scalable algorithms for optimal string similarity search and join. Our methods are variations of those applied in Masai [15], our recently published tool for mapping high-throughput DNA sequencing data with unpreceded speed and accuracy. The key features of our approach are filtration with approximate seeds and methods for multiple backtracking. Approximate seeds, compared to exact seeds, increase filtration specificity while preserving sensitivity. Multiple backtracking amortizes the cost of searching a large set of seeds. Combined together, these two methods significantly speed up string similarity search and join operations. Our tool is implemented in C++ and OpenMP using the SeqAn library. The source code is distributed under the BSD license and can be freely downloaded from http://www.seqan.de/projects/edbt2013.}
    }
  • T. Steijger, J. F. Abril, P. G. Engström, F. Kokocinski, T. R. Consortium, H. Richard, M. H. Schulz, D. Weese, T. Hubbard, R. Guigó, J. Harrow, and P. Bertone, “Assessment of transcript reconstruction methods for rna-seq,” Nature methods, vol. 10, iss. 12, pp. 1177-1184, 2013.
    [Bibtex]
    @article{fu_mi_publications1380,
    month = {November},
    volume = {10},
    title = {Assessment of transcript reconstruction methods for RNA-seq},
    year = {2013},
    author = {T. Steijger and J. F. Abril and P. G. Engstr{\"o}m and F. Kokocinski and The RGASP Consortium and H. Richard and M. H. Schulz and D. Weese and T. Hubbard and R. Guig{\'o} and J. Harrow and P. Bertone},
    pages = {1177--1184},
    number = {12},
    publisher = {Nature Publishing Group},
    journal = {Nature Methods},
    abstract = {We evaluated 25 protocol variants of 14 independent computational methods for exon identification, transcript reconstruction and expression-level quantification from RNA-seq data. Our results show that most algorithms are able to identify discrete transcript components with high success rates but that assembly of complete isoform structures poses a major challenge even when all constituent elements are identified. Expression-level estimates also varied widely across methods, even when based on similar transcript models. Consequently, the complexity of higher eukaryotic genomes imposes severe limitations on transcript recall and splice product discrimination that are likely to remain limiting factors for the analysis of current-generation RNA-seq data.},
    url = {http://publications.imp.fu-berlin.de/1380/}
    }
  • D. Weese, “Indices and applications in high-throughput sequencing,” PhD Thesis, 2013.
    [Bibtex]
    @phdthesis{fu_mi_publications1288,
    author = {D. Weese},
    year = {2013},
    title = {Indices and Applications in High-Throughput Sequencing},
    school = {Freie Universit{\"a}t Berlin},
    month = {June},
    abstract = {Recent advances in sequencing technology allow to produce billions of base pairs per day in the form of reads of length 100 bp an longer and current developments promise the personal \$1,000 genome in a couple of years. The analysis of these unprecedented amounts of data demands for efficient data structures and algorithms. One such data structures is the substring index, that represents all substrings or substrings up to a certain length contained in a given text.
    In this thesis we propose 3 substring indices, which we extend to be applicable to millions of sequences. We devise internal and external memory construction algorithms and a uniform framework for accessing the generalized suffix tree. Additionally we propose different index-based applications, e.g. exact and approximate pattern matching and different repeat search algorithms.
    Second, we present the read mapping tool RazerS, which aligns millions of single or paired-end reads of arbitrary lengths to their potential genomic origin using either Hamming or edit distance. Our tool can work either lossless or with a user-defined loss rate at higher speeds. Given the loss rate, we present a novel approach that guarantees not to lose more reads than specified. This enables the user to adapt to the problem at hand and provides a seamless tradeoff between sensitivity and running time. We compare RazerS with other state-of-the-art read mappers and show that it has the highest sensitivity and a comparable performance on various real-world datasets.
    At last, we propose a general approach for frequency based string mining, which has many applications, e.g. in contrast data mining. Our contribution is a novel and lightweight algorithm that is faster and uses less memory than the best available algorithms. We show its applicability for mining multiple databases with a variety of frequency constraints. As such, we use the notion of entropy from information theory to generalize the emerging substring mining problem to multiple databases. To demonstrate the improvement of our algorithm we compared to recent approaches on real-world experiments of various string domains, e.g. natural language, DNA, or protein sequences.},
    keywords = {HTS; full-text index; frequency string mining; read mapping; SeqAn},
    url = {http://publications.imp.fu-berlin.de/1288/}
    }
  • A. Zerck, E. Nordhoff, H. Lehrach, and K. Reinert, “Optimal precursor ion selection for lc-maldi ms/ms,” Bmc bioinformatics, vol. 14, iss. 1, p. 56, 2013.
    [Bibtex]
    @article{fu_mi_publications1400,
    publisher = {BioMed Central},
    journal = {BMC Bioinformatics},
    year = {2013},
    number = {1},
    pages = {56},
    author = {A. Zerck and E. Nordhoff and H. Lehrach and K. Reinert},
    volume = {14},
    month = {February},
    title = {Optimal precursor ion selection for LC-MALDI MS/MS},
    url = {http://publications.imp.fu-berlin.de/1400/},
    abstract = {Background
    Liquid chromatography mass spectrometry (LC-MS) maps in shotgun proteomics are often too complex to select every detected peptide signal for fragmentation by tandem mass spectrometry (MS/MS). Standard methods for precursor ion selection, commonly based on data dependent acquisition, select highly abundant peptide signals in each spectrum. However, these approaches produce redundant information and are biased towards high-abundance proteins.
    Results
    We present two algorithms for inclusion list creation that formulate precursor ion selection as an optimization problem. Given an LC-MS map, the first approach maximizes the number of selected precursors given constraints such as a limited number of acquisitions per RT fraction. Second, we introduce a protein sequence-based inclusion list that can be used to monitor proteins of interest. Given only the protein sequences, we create an inclusion list that optimally covers the whole protein set. Additionally, we propose an iterative precursor ion selection that aims at reducing the redundancy obtained with data dependent LC-MS/MS. We overcome the risk of erroneous assignments by including methods for retention time and proteotypicity predictions. We show that our method identifies a set of proteins requiring fewer precursors than standard approaches. Thus, it is well suited for precursor ion selection in experiments with limited sample amount or analysis time.
    Conclusions
    We present three approaches to precursor ion selection with LC-MALDI MS/MS. Using a well-defined protein standard and a complex human cell lysate, we demonstrate that our methods outperform standard approaches. Our algorithms are implemented as part of OpenMS and are available under http://www.openms.de.
    }
    }
  • L. de la Garza, J. Krüger, C. Schärfe, M. Röttig, S. Aiche, K. Reinert, and O. Kohlbacher, “From the desktop to the grid: conversion of knime workflows to guse,” in Iwsg (international workshop on science gateways), 2013.
    [Bibtex]
    @inproceedings{fu_mi_publications1441,
    title = {From the Desktop to the Grid: conversion of KNIME Workflows to gUSE},
    author = {L. de la Garza and J. Kr{\"u}ger and Ch. Sch{\"a}rfe and M. R{\"o}ttig and S. Aiche and K. Reinert and O. Kohlbacher},
    booktitle = {IWSG (International Workshop on Science Gateways)},
    year = {2013},
    url = {http://publications.imp.fu-berlin.de/1441/},
    abstract = {The Konstanz Information Miner is a user-friendly
    graphical workflow designer with a broad user base in industry and academia. Its broad range of embedded tools and its powerful data mining and visualization tools render it ideal for scientific workflows. It is thus used more and more in a broad range of applications. However, the free version typically runs on a desktop computer, restricting users if they want to tap into computing power. The grid and cloud User Support Environment is a free and open source project created for parallelized and distributed systems, but the creation of workflows with the included components has a steeper learning curve.
    In this work we suggest an easy to implement solution
    combining the ease-of-use of the Konstanz Information Miner
    with the computational power of distributed computing infrastructures. We present a solution permitting the conversion of workflows between the two platforms. This enables a convenient development, debugging, and maintenance of scientific workflows on the desktop. These workflows can then be deployed on a cloud or grid, thus permitting large-scale computation.
    To achieve our goals, we relied on a Common Tool Description
    XML file format which describes the execution of arbitrary
    programs in a structured and easily readable and parseable way. In order to integrate external programs into we employed the Generic KNIME Nodes extension.}
    }

2012

  • S. Aiche, K. Reinert, C. Schütte, D. Hildebrand, H. Schlüter, and T. O. F. Conrad, “Inferring proteolytic processes from mass spectrometry time series data using degradation graphs,” Plos one, vol. 7, iss. 7, p. e40656, 2012.
    [Bibtex]
    @article{fu_mi_publications1143,
    publisher = {Public Library of Science},
    journal = {PLoS ONE},
    pages = {e40656},
    number = {7},
    author = {S. Aiche and K. Reinert and Ch. Sch{\"u}tte and D. Hildebrand and H. Schl{\"u}ter and T. O. F. Conrad},
    year = {2012},
    title = {Inferring Proteolytic Processes from Mass Spectrometry Time Series Data Using Degradation Graphs},
    volume = {7},
    month = {July},
    url = {http://publications.imp.fu-berlin.de/1143/},
    abstract = {Background: Proteases play an essential part in a variety of biological processes. Beside their importance
    under healthy conditions they are also known to have a crucial role in complex diseases like cancer.
    It was shown in the last years that not only the fragments produced by proteases but also their dynamics,
    especially ex vivo, can serve as biomarkers. But so far, only a few approaches were taken to explicitly
    model the dynamics of proteolysis in the context of mass spectrometry.
    Results: We introduce a new concept model proteolytic processes, the degradation graph. The degra-
    dation graph is an extension of the cleavage graph, a data structure to reconstruct and visualize the
    proteolytic process. In contrast to previous approaches we extended the model to incorporate endoproteolytic
    processes and present a method to construct a degradation graph from mass spectrometry
    time-series data. Based on a degradation graph and the intensities extracted from the mass spectra it is
    possible to estimate reaction rates of the underlying processes. We further suggest a score to rate different
    degradation graphs in their ability to explain the observed data. This score is used in an iterative
    heuristic to improve the structure of the initially constructed degradation graph.
    Conclusion: We show that the proposed method is able to recover all degraded and generated
    peptides, the underlying reactions, and the reaction rates of proteolytic processes based on mass spectrometry
    time-series data. We use simulated and real data to demonstrate that a given process can be
    reconstructed even in the presence of extensive noise, isobaric signals and false identications. While the
    model is currently only validated on peptide data it is also applicable to proteins, as long as the necessary
    time series data can be produced.}
    }
  • S. Andreotti, G. W. Klau, and K. Reinert, “Antilope – a lagrangian relaxation approach to the de novo peptide sequencing problem,” Ieee/acm transactions on computational biology and bioinformatics (tcbb), vol. 9, iss. 2, pp. 385-394, 2012.
    [Bibtex]
    @article{fu_mi_publications1047,
    pages = {385--394},
    number = {2},
    author = {S. Andreotti and G. W. Klau and K. Reinert},
    year = {2012},
    address = {Los Alamitos, CA, USA},
    title = {Antilope - A Lagrangian Relaxation Approach to the de novo Peptide Sequencing Problem},
    volume = {9},
    month = {March},
    publisher = {IEEE Computer Society Press },
    note = {doi: 10.1109/TCBB.2011.59},
    journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB) },
    abstract = {Peptide sequencing from mass spectrometry data is a key step in proteome research. Especially de novo sequencing, the identification of a peptide from its spectrum alone, is still a challenge even for state-of-the-art algorithmic approaches. In this paper we present ANTILOPE, a new fast and flexible approach based on mathematical programming. It builds on the spectrum graph model and works with a variety of scoring schemes. ANTILOPE combines Lagrangian relaxation for solving an integer linear programming formulation with an adaptation of Yen?s k shortest paths algorithm. It shows a significant improvement in running time compared to mixed integer optimization and performs at the same speed like other state-of-the-art tools. We also implemented a generic probabilistic scoring scheme that can be trained automatically for a dataset of annotated spectra and is independent of the mass spectrometer type. Evaluations on benchmark data show that ANTILOPE is competitive to the popular state-of-the-art programs PepNovo and NovoHMM both in terms of run time and accuracy. Furthermore, it offers increased flexibility in the number of considered ion types. ANTILOPE will be freely available as part of the open source proteomics library OpenMS.},
    url = {http://publications.imp.fu-berlin.de/1047/}
    }
  • C. Bielow, “Quantification and simulation of liquid chromatography-mass spectrometry data,” PhD Thesis, Berlin, Germany, 2012.
    [Bibtex]
    @phdthesis{fu_mi_publications1444,
    title = {Quantification and Simulation of Liquid Chromatography-Mass Spectrometry Data},
    school = {Freie Universit{\"a}t Berlin},
    month = {October},
    author = {C. Bielow},
    address = {Berlin, Germany},
    year = {2012},
    url = {http://publications.imp.fu-berlin.de/1444/},
    abstract = {Computational mass spectrometry is a fast evolving field that has attracted increased attention over the last couple of years.
    The performance of software solutions determines the success of analysis to a great extent. New algorithms are required to reflect new experimental procedures and deal with new instrument generations.
    One essential component of algorithm development is the validation (as well as comparison) of software on a broad range of
    data sets. This requires a gold standard (or so-called ground truth), which is usually obtained by manual annotation of a real data set.
    Comprehensive manually annotated public data sets for mass spectrometry data are labor-intensive to produce and their quality strongly depends on
    the skill of the human expert. Some parts of the data may even be impossible to annotate due to high levels of noise or other ambiguities.
    Furthermore, manually annotated data is usually not available for all steps in a typical computational analysis pipeline.
    We thus developed the most comprehensive simulation software to date, which allows to generate multiple levels of ground truth
    and features a plethora of settings to reflect experimental conditions and instrument settings.
    The simulator is used to generate several distinct types of data. The data are subsequently employed to evaluate existing algorithms.
    Additionally, we employ simulation to determine the influence of instrument attributes and sample complexity on the ability of
    algorithms to recover information. The results give valuable hints on how to optimize experimental setups.
    Furthermore, this thesis introduces two quantitative approaches, namely a decharging algorithm based on integer linear programming
    and a new workflow for identification of differentially expressed proteins for a large in vitro study on toxic compounds.
    Decharging infers the uncharged mass of a peptide (or protein) by clustering all its charge variants. The latter occur frequently under certain experimental conditions.
    We employ simulation to show that decharging is robust against missing values even for high complexity data and that the algorithm outperforms
    other solutions in terms of mass accuracy and run time on real data.
    The last part of this thesis deals with a new state-of-the-art workflow for protein quantification based on isobaric tags for relative and absolute quantitation (iTRAQ). We devise
    a new approach to isotope correction, propose an experimental design, introduce new metrics of iTRAQ data quality, and
    confirm putative properties of iTRAQ data using a novel approach.
    All tools developed as part of this thesis are implemented in OpenMS, a C++ library for computational mass spectrometry.}
    }
  • A. -K. Emde, M. H. Schulz, D. Weese, R. Sun, M. Vingron, V. M. Kalscheuer, S. A. Haas, and K. Reinert, “Detecting genomic indel variants with exact breakpoints in single- and paired-end sequencing data using splazers,” Bioinformatics, vol. 28, iss. 5, pp. 619-627, 2012.
    [Bibtex]
    @article{fu_mi_publications1160,
    author = {A.-K. Emde and M. H. Schulz and D. Weese and R. Sun and M. Vingron and V. M. Kalscheuer and S. A. Haas and K. Reinert},
    pages = {619--627},
    number = {5},
    year = {2012},
    title = {Detecting genomic indel variants with exact breakpoints in single- and paired-end sequencing data using SplazerS},
    month = {January},
    volume = {28},
    publisher = {Oxford University Press},
    journal = {Bioinformatics},
    abstract = {Motivation: The reliable detection of genomic variation in resequencing data is still a major challenge, especially for variants larger than a few base pairs. Sequencing reads crossing boundaries of structural variation carry the potential for their identification, but are difficult to map.
    Results: Here we present a method for ?split? read mapping, where prefix and suffix match of a read may be interrupted by a longer gap in the read-to-reference alignment. We use this method to accurately detect medium-sized insertions and long deletions with precise breakpoints in genomic resequencing data. Compared with alternative split mapping methods, SplazerS significantly improves sensitivity for detecting large indel events, especially in variant-rich regions. Our method is robust in the presence of sequencing errors as well as alignment errors due to genomic mutations/divergence, and can be used on reads of variable lengths. Our analysis shows that SplazerS is a versatile tool applicable to unanchored or single-end as well as anchored paired-end reads. In addition, application of SplazerS to targeted resequencing data led to the interesting discovery of a complete, possibly functional gene retrocopy variant.
    Availability: SplazerS is available from http://www.seqan.de/projects/ splazers.},
    url = {http://publications.imp.fu-berlin.de/1160/}
    }
  • J. Junker, C. Bielow, A. Bertsch, M. Sturm, K. Reinert, and O. Kohlbacher, “Toppas: a graphical workflow editor for the analysis of high-throughput proteomics data,” J. proteome res., vol. 11, iss. 7, pp. 3914-3920, 2012.
    [Bibtex]
    @article{fu_mi_publications1396,
    title = {TOPPAS: a graphical workflow editor for the analysis of high-throughput proteomics data},
    month = {July},
    volume = {11},
    author = {J. Junker and C. Bielow and A. Bertsch and M. Sturm and K. Reinert and O. Kohlbacher},
    pages = {3914--3920},
    number = {7},
    year = {2012},
    journal = {J. Proteome Res.},
    publisher = {ACS Publications},
    abstract = {Mass spectrometry coupled to high-performance liquid chromatography (HPLC-MS) is evolving more quickly than ever. A wide range of different instrument types and experimental setups are commonly used. Modern instruments acquire huge amounts of data, thus requiring tools for an efficient and automated data analysis. Most existing software for analyzing HPLC-MS data is monolithic and tailored toward a specific application. A more flexible alternative consists of pipeline-based tool kits allowing the construction of custom analysis workflows from small building blocks, e.g., the Trans Proteomics Pipeline (TPP) or The OpenMS Proteomics Pipeline (TOPP). One drawback, however, is the hurdle of setting up complex workflows using command line tools. We present TOPPAS, The OpenMS Proteomics Pipeline ASsistant, a graphical user interface (GUI) for rapid composition of HPLC-MS analysis workflows. Workflow construction reduces to simple drag-and-drop of analysis tools and adding connections in between. Integration of external tools into these workflows is possible as well. Once workflows have been developed, they can be deployed in other workflow management systems or batch processing systems in a fully automated fashion. The implementation is portable and has been tested under Windows, Mac OS X, and Linux. TOPPAS is open-source software and available free of charge at http://www.OpenMS.de/TOPPAS .},
    url = {http://publications.imp.fu-berlin.de/1396/}
    }
  • B. Kehr, K. Reinert, and A. E. Darling, “Hidden breakpoints in genome alignments,” in Lecture notes in computer science: algorithms in bioinformatics, B. Raphael and J. Tang, Eds., Berlin: Springer-Verlag, 2012, vol. 7534, pp. 391-403.
    [Bibtex]
    @incollection{fu_mi_publications1168,
    editor = {B. Raphael and J. Tang},
    publisher = {Springer-Verlag},
    booktitle = {Lecture Notes in Computer Science: Algorithms in Bioinformatics},
    volume = {7534},
    title = {Hidden Breakpoints in Genome Alignments },
    year = {2012},
    address = {Berlin},
    author = {B. Kehr and K. Reinert and A. E. Darling},
    pages = {391--403},
    abstract = {During the course of evolution, an organism?s genome can undergo changes that affect the large-scale structure of the genome. These changes include gene gain, loss, duplication, chromosome fusion, fission, and rearrangement. When gene gain and loss occurs in addition to other types of rearrangement, breakpoints of rearrangement can exist that are only detectable by comparison of three or more genomes. An arbitrarily large number of these ?hidden? breakpoints can exist among genomes that exhibit no rearrangements in pairwise comparisons.
    We present an extension of the multichromosomal breakpoint median problem to genomes that have undergone gene gain and loss. We then demonstrate that the median distance among three genomes can be used to calculate a lower bound on the number of hidden breakpoints present. We provide an implementation of this calculation including the median distance, along with some practical improvements on the time complexity of the underlying algorithm.
    We apply our approach to measure the abundance of hidden breakpoints in simulated data sets under a wide range of evolutionary scenarios. We demonstrate that in simulations the hidden breakpoint counts depend strongly on relative rates of inversion and gene gain/loss. Finally we apply current multiple genome aligners to the simulated genomes, and show that all aligners introduce a high degree of error in hidden breakpoint counts, and that this error grows with evolutionary distance in the simulation. Our results suggest that hidden breakpoint error may be pervasive in genome alignments. },
    url = {http://publications.imp.fu-berlin.de/1168/}
    }
  • D. Weese, M. Holtgrewe, and K. Reinert, “Razers 3: faster, fully sensitive read mapping,” Bioinformatics, vol. 28, iss. 20, pp. 2592-2599, 2012.
    [Bibtex]
    @article{fu_mi_publications1159,
    title = {RazerS 3: Faster, fully sensitive read mapping},
    volume = {28},
    month = {August},
    pages = {2592--2599},
    number = {20},
    author = {D. Weese and M. Holtgrewe and K. Reinert},
    year = {2012},
    journal = {Bioinformatics},
    publisher = {Oxford University Press},
    abstract = {Motivation: During the last years NGS sequencing has become a key technology for many applications in the biomedical sciences. Throughput continues to increase and new protocols provide longer reads than currently available. In almost all applications, read mapping is a first step. Hence, it is crucial to have algorithms and implementations that perform fast, with high sensitivity, and are able to deal with long reads and a large absolute number of indels.
    Results: RazerS is a read mapping program with adjustable sensitivity based on counting q-grams. In this work we propose the successor RazerS 3 which now supports shared-memory parallelism, an additional seed-based filter with adjustable sensitivity, a much faster, banded version of the Myers? bit-vector algorithm for verification, memory saving measures and support for the SAM output format. This leads to a much improved performance for mapping reads, in particular long reads with many errors. We extensively compare RazerS 3 with other popular read mappers and show that its results are often superior to them in terms of sensitivity while exhibiting practical and often competetive run times. In addition, RazerS 3 works without a precomputed index.
    Availability and Implementation: Source code and binaries are freely available for download at http://www.seqan.de/projects/razers. RazerS 3 is implemented in C++ and OpenMP under a GPL license using the SeqAn library and supports Linux, Mac OS X, and Windows.},
    url = {http://publications.imp.fu-berlin.de/1159/}
    }

2011

  • C. Bielow, C. Gröpl, O. Kohlbacher, and K. Reinert, “Bioinformatics for qualitative and quantitative proteomics.,” in Bioinformatics for omics data methods and protocols, Humana Press, 2011, vol. 719, pp. 331-49.
    [Bibtex]
    @incollection{fu_mi_publications1067,
    month = {January},
    volume = {719},
    title = {Bioinformatics for qualitative and quantitative proteomics.},
    year = {2011},
    author = {C. Bielow and C. Gr{\"o}pl and O. Kohlbacher and K. Reinert},
    pages = {331--49},
    publisher = {Humana Press},
    journal = {Methods in molecular biology (Clifton, N.J.)},
    booktitle = {Bioinformatics for Omics Data Methods and Protocols},
    url = {http://publications.imp.fu-berlin.de/1067/},
    abstract = {Mass spectrometry is today a key analytical technique to elucidate the amount and content of proteins expressed in a certain cellular context. The degree of automation in proteomics has yet to reach that of genomic techniques, but even current technologies make a manual inspection of the data infeasible. This article addresses the key algorithmic problems bioinformaticians face when handling modern proteomic samples and shows common solutions to them. We provide examples on how algorithms can be combined to build relatively complex analysis pipelines, point out certain pitfalls and aspects worth considering and give a list of current state-of-the-art tools.}
    }
  • C. Bielow, S. Aiche, S. Andreotti, and K. Reinert, “Mssimulator: simulation of mass spectrometry data,” Journal of proteome research, vol. 10, iss. 7, pp. 2922-2929, 2011.
    [Bibtex]
    @article{fu_mi_publications1066,
    journal = {Journal of Proteome Research},
    author = {C. Bielow and S. Aiche and S. Andreotti and K. Reinert},
    pages = {2922--2929},
    number = {7},
    year = {2011},
    title = {MSSimulator: Simulation of Mass Spectrometry Data},
    month = {July},
    volume = {10},
    url = {http://publications.imp.fu-berlin.de/1066/},
    abstract = {Mass spectrometry coupled to liquid chromatography (LC-MS and LC-MS/MS) is commonly used to analyze the protein content of biological samples in large scale studies, enabling quantitation and identification of proteins and peptides using a wide range of experimental protocols, algorithms, and statistical models to analyze the data. Currently it is difficult to compare the plethora of algorithms for these tasks. So far, curated benchmark data exists for peptide identification algorithms but data that represents a ground truth for the evaluation of LC-MS data is limited. Hence there have been attempts to simulate such data in a controlled fashion to evaluate and compare algorithms. We present MSSimulator, a simulation software for LC-MS and LC-MS/MS experiments. Starting from a list of proteins from a FASTA file,the simulation will perform in-silico digestion, retention time prediction, ionization filtering, and raw signal simulation (including MS/MS), while providing many options to change the properties of the resultingdata like elution profile shape, resolution and sampling rate. Several protocols for SILAC, iTRAQ or MS(E) are available, in addition to the usual label-free approach, making MSSimulator the most comprehensive simulator for LC-MS and LC-MS/MS data.}
    }
  • S. Böcker, B. Kehr, and F. Rasche, “Determination of glycan structure from tandem mass spectra,” Ieee/acm transactions on computational biology and bioinformatics, vol. 8, iss. 4, pp. 976-986, 2011.
    [Bibtex]
    @article{fu_mi_publications1132,
    journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics },
    publisher = {IEEE computer society, TCBB},
    volume = {8},
    month = {July},
    title = {Determination of Glycan Structure from Tandem Mass Spectra},
    year = {2011},
    pages = {976--986},
    number = {4},
    author = {S. B{\"o}cker and B. Kehr and F. Rasche},
    abstract = {Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. In this paper, we consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry (MS) data. We study, how to generate glycan tree candidates that sufficiently match the sample mass spectrum, avoiding the combinatorial explosion of glycan structures. Unfortunately, the resulting problem is known to be computationally hard. We present an efficient exact algorithm for this problem based on fixed-parameter algorithmics that can process a spectrum in a matter of seconds. We also report some preliminary results of our method on experimental data, combining it with a preliminary candidate evaluation scheme. We show that our approach is fast in applications, and that we can reach very well de novo identification results. Finally, we show how to count the number of glycan topologies for a fixed size or a fixed mass. We generalize this result to count the number of (labeled) trees with bounded out degree, improving on results obtained using P{\'o}lya's enumeration theorem.},
    url = {http://publications.imp.fu-berlin.de/1132/}
    }
  • M. Holtgrewe, A. -K. Emde, D. Weese, and K. Reinert, “A novel and well-defined benchmarking method for second generation read mapping,” Bmc bioinformatics, vol. 12, iss. 120, 2011.
    [Bibtex]
    @article{fu_mi_publications1072,
    publisher = {BioMed Central},
    journal = {BMC Bioinformatics},
    author = {M. Holtgrewe and A.-K. Emde and D. Weese and K. Reinert},
    number = {120},
    year = {2011},
    title = {A Novel And Well-Defined Benchmarking Method For Second Generation Read Mapping},
    month = {May},
    volume = {12},
    url = {http://publications.imp.fu-berlin.de/1072/},
    abstract = {Background
    Second generation sequencing technologies yield DNA sequence data at ultra high-throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. The assessment of the quality of read mapping results is not straightforward and has not been formalized so far. Hence, it has not been easy to compare different read mapping approaches in a unified way and to determine which program is the best for what task.
    Results
    We present a new benchmark method, called Rabema (Read Alignment BEnchMArk), for read mappers. It consists of a strict definition of the read mapping problem and of tools to evaluate the result of arbitrary read mappers supporting the SAM output format.
    Conclusions
    We show the usefulness of the benchmark program by performing a comparison of popular read mappers. The tools supporting the benchmark are licensed under the GPL and available from http://www.seqan.de/projects/rabema.html.}
    }
  • B. Kehr, D. Weese, and K. Reinert, “Stellar: fast and exact local alignments,” Bmc bioinformatics, vol. 12, iss. S9, p. S15, 2011.
    [Bibtex]
    @article{fu_mi_publications1092,
    title = {STELLAR: fast and exact local alignments},
    volume = {12},
    month = {October},
    number = {S9},
    pages = {S15},
    author = {B. Kehr and D. Weese and K. Reinert},
    year = {2011},
    publisher = {BioMed Central},
    journal = {BMC Bioinformatics},
    abstract = {Background
    Large-scale comparison of genomic sequences requires reliable tools for the search of local alignments. Practical local aligners are in general fast, but heuristic, and hence sometimes miss significant matches.
    Results
    We present here the local pairwise aligner STELLAR that has full sensitivity for {\ensuremath{\epsilon}}-alignments, i.e. guarantees to report all local alignments of a given minimal length and maximal error rate. The aligner is composed of two steps, filtering and verification. We apply the SWIFT algorithm for lossless filtering, and have developed a new verification strategy that we prove to be exact. Our results on simulated and real genomic data confirm and quantify the conjecture that heuristic tools like BLAST or BLAT miss a large percentage of significant local alignments.
    Conclusions
    STELLAR is very practical and fast on very long sequences which makes it a suitable new tool for finding local alignments between genomic sequences under the edit distance model. Binaries are freely available for Linux, Windows, and Mac OS X at http://www.seqan.de/projects/stellar. The source code is freely distributed with the SeqAn C++ library version 1.3 and later at http://www.seqan.de.},
    url = {http://publications.imp.fu-berlin.de/1092/}
    }
  • T. Rausch and K. Reinert, “Practical multiple sequence alignment,” in Problem solving handbook in computational biology and bioinformatics, L. S. Heath and N. Ramakrishnan, Eds., Springer Science+Business Media, 2011, pp. 21-43.
    [Bibtex]
    @incollection{fu_mi_publications393,
    publisher = {Springer Science+Business Media},
    title = {Practical multiple Sequence alignment},
    editor = {L. S. Heath and N. Ramakrishnan},
    pages = {21--43},
    booktitle = {Problem Solving Handbook in Computational Biology and Bioinformatics},
    author = {T. Rausch and K. Reinert},
    year = {2011},
    url = {http://publications.imp.fu-berlin.de/393/},
    abstract = {Abstract Multiple sequence alignment as a means of comparing DNA, RNA or amino acid sequences is an essential precondition for various analyses, including structure prediction, modeling binding sites, phylogeny or function prediction. This range of applications implies a demand for versatile, flexible and specialized meth- ods to compute accurate alignments. This chapter summarizes the key algorithmic insights gained in the past years to facilitate both, an easy understanding of the current multiple sequence alignment literature and to enable the readers to use and apply current tools in their own everyday research.}
    }

2010

  • C. Bielow, S. Ruzek, C. Huber, and K. Reinert, “Optimal decharging and clustering of charge ladders generated in esi?ms,” J. proteome res., vol. 9, iss. 5, pp. 2688-2695, 2010.
    [Bibtex]
    @article{fu_mi_publications895,
    journal = {J. Proteome Res.},
    note = {online publication complete, printed publication to be expected},
    publisher = {ACS Publications},
    author = {C. Bielow and S. Ruzek and C. Huber and K. Reinert},
    number = {5},
    pages = {2688--2695},
    year = {2010},
    title = {Optimal Decharging and Clustering of Charge Ladders Generated in ESI?MS},
    month = {March},
    volume = {9},
    abstract = {In electrospray ionization mass spectrometry (ESI?MS), peptide and protein ions are usually observed in multiple charge states. Moreover, adduction of the multiply charged species with other ions frequently results in quite complex signal patterns for a single analyte, which significantly complicates the derivation of quantitative information from the mass spectra. Labeling strategies targeting the MS1 level further aggravate this situation, as multiple biological states such as healthy or diseased must be represented simultaneously. We developed an integer linear programming (ILP) approach, which can cluster signals belonging to the same peptide or protein. The algorithm is general in that it models all possible shifts of signals along the m/z axis. These shifts can be induced by different charge states of the compound, the presence of adducts (e.g., potassium or sodium), and/or a fixed mass label (e.g., from ICAT or nicotinic acid labeling), or any combination of the above. We show that our approach can be used to infer more features in labeled data sets, correct wrong charge assignments even in high-resolution MS, improve mass precision, and cluster charged species in different charge states and several adduct types.},
    url = {http://publications.imp.fu-berlin.de/895/}
    }
  • M. Birn, M. Holtgrewe, P. Sanders, and J. Singler, “Simple and fast nearest neighbor search,” 2010 proceedings of the twelfth workshop on algorithm engineering and experiments (alenex), pp. 43-54, 2010.
    [Bibtex]
    @article{fu_mi_publications926,
    title = {Simple and Fast Nearest Neighbor Search},
    publisher = {ACM SIAM},
    journal = {2010 Proceedings of the Twelfth Workshop on Algorithm Engineering and Experiments (ALENEX)},
    year = {2010},
    author = {M. Birn and M. Holtgrewe and P. Sanders and J. Singler},
    pages = {43--54},
    url = {http://publications.imp.fu-berlin.de/926/},
    abstract = {We present a simple randomized data structure for two-dimensional point sets that allows fast nearest neighbor queries in many cases. An implementation outperforms several previous implementations for commonly used benchmarks.}
    }
  • A. -K. Emde, M. Grunert, D. Weese, K. Reinert, and S. R. Sperling, “Microrazers: rapid alignment of small rna reads,” Bioinformatics, vol. 26, iss. 1, pp. 123-124, 2010.
    [Bibtex]
    @article{fu_mi_publications792,
    journal = {Bioinformatics},
    publisher = {Oxford University Press},
    year = {2010},
    pages = {123--124},
    number = {1},
    author = {A.-K. Emde and M. Grunert and D. Weese and K. Reinert and S. R. Sperling},
    volume = {26},
    month = {January},
    title = {MicroRazerS: Rapid alignment of small RNA reads},
    url = {http://publications.imp.fu-berlin.de/792/},
    abstract = {Motivation: Deep sequencing has become the method of choice for determining the small RNA content of a cell. Mapping the sequenced reads onto their reference genome serves as the basis for all further analyses, namely for identification and quantification. A method frequently used is Mega BLAST followed by several filtering steps, even though it is slow and inefficient for this task. Also, none of the currently available short read aligners has established itself for the particular task of small RNA mapping. Results: We present MicroRazerS, a tool optimized for mapping small RNAs onto a reference genome. It is an order of magnitude faster than Mega BLAST and comparable in speed to other short read mapping tools. In addition, it is more sensitive and easy to handle and adjust. Availability: MicroRazerS is part of the SeqAn C++ library and can be downloaded from http://www.seqan.de/projects/MicroRazerS.html. Contact: emde@inf.fu-berlin.de, grunert@molgen.mpg.de}
    }
  • A. Gogol-Döring and K. Reinert, Biological sequence analysis using the seqan c++ library, Boca Raton, USA: CRC Press, 2010.
    [Bibtex]
    @book{fu_mi_publications825,
    title = {Biological Sequence Analysis using the SeqAn C++ Library},
    publisher = {CRC Press},
    series = {Chapman \& Hall/CRC Mathematical \& Computational Biology },
    author = {A. Gogol-D{\"o}ring and K. Reinert},
    number = {1},
    address = {Boca Raton, USA},
    year = {2010},
    abstract = {Before the SeqAn project, there was clearly a lack of available implementations in sequence analysis, even for standard tasks. Implementations of needed algorithmic components were either unavailable or hard to access in third-party monolithic software products. Addressing these concerns, the developers of SeqAn created a comprehensive, easy-to-use, open source C++ library of efficient algorithms and data structures for the analysis of biological sequences. Written by the founders of this project, Biological Sequence Analysis Using the SeqAn C++ Library covers the SeqAn library, its documentation, and the supporting infrastructure.
    The first part of the book describes the general library design. It introduces biological sequence analysis problems, discusses the benefit of using software libraries, summarizes the design principles and goals of SeqAn, details the main programming techniques used in SeqAn, and demonstrates the application of these techniques in various examples. Focusing on the components provided by SeqAn, the second part explores basic functionality, sequence data structures, alignments, pattern and motif searching, string indices, and graphs. The last part illustrates applications of SeqAn to genome alignment, consensus sequence in assembly projects, suffix array construction, and more.
    This handy book describes a user-friendly library of efficient data types and algorithms for sequence analysis in computational biology. SeqAn enables not only the implementation of new algorithms, but also the sound analysis and comparison of existing algorithms.
    },
    url = {http://publications.imp.fu-berlin.de/825/}
    }
  • M. Holtgrewe, P. Sanders, and C. Schulz, “Engineering a scalable high quality graph partitioner,” 2010 ieee international symposium on parallel & distributed processing (ipdps), pp. 1-12, 2010.
    [Bibtex]
    @article{fu_mi_publications930,
    month = {April},
    journal = {2010 IEEE International Symposium on Parallel \& Distributed Processing (IPDPS)},
    title = {Engineering a scalable high quality graph partitioner},
    year = {2010},
    pages = {1--12},
    author = {M. Holtgrewe and P. Sanders and C. Schulz},
    abstract = {We describe an approach to parallel graph partitioning that scales to hundreds of processors and produces a high solution quality. For example, for many instances from Walshaw's benchmark collection we improve the best known partitioning. We use the well known framework of multi-level graph partitioning. All components are implemented by scalable parallel algorithms. Quality improvements compared to previous systems are due to better prioritization of edges to be contracted, better approximation algorithms for identifying matchings, better local search heuristics, and perhaps most notably, a parallelization of the FM local search algorithm that works more locally than previous approaches.},
    url = {http://publications.imp.fu-berlin.de/930/}
    }
  • M. Holtgrewe, “Mason ? a read simulator for second generation sequencing data,” Technical report fu berlin, 2010.
    [Bibtex]
    @article{fu_mi_publications962,
    month = {October},
    journal = {Technical Report FU Berlin},
    title = {Mason ? A Read Simulator for Second Generation Sequencing Data},
    year = {2010},
    author = {M. Holtgrewe},
    abstract = {We present a read simulator software for Illumina, 454 and Sanger reads. Its features include position specific error rates and base quality values. For Illumina reads, we give a comprehensive analysis with empirical data for the error and quality model. For the other technologies, we use models from the literature. It has been written with performance in mind and can sample reads from large genomes. The C++ source code is extensible, and freely available under the GPL/LGPL.},
    url = {http://publications.imp.fu-berlin.de/962/}
    }
  • D. Hüser, A. Gogol-Döring, T. Lutter, S. Weger, K. Winter, E. -M. Hammer, T. Cathomen, K. Reinert, and R. Heilbronn, “Integration preferences of wildtype aav-2 for consensus rep-binding sites at numerous loci in the human genome,” Plos pathogens, vol. 6, iss. 7, p. e1000985, 2010.
    [Bibtex]
    @article{fu_mi_publications935,
    journal = {PLoS Pathogens},
    title = {Integration Preferences of Wildtype AAV-2 for Consensus Rep-Binding Sites at Numerous Loci in the Human Genome},
    volume = {6},
    month = {July},
    number = {7},
    pages = {e1000985},
    author = {D. H{\"u}ser and A. Gogol-D{\"o}ring and T. Lutter and S. Weger and K. Winter and E.-M. Hammer and T. Cathomen and K. Reinert and R. Heilbronn},
    year = {2010},
    abstract = {Adeno-associated virus type 2 (AAV) is known to establish latency by preferential integration in human chromosome 19q13.42. The AAV non-structural protein Rep appears to target a site called AAVS1 by simultaneously binding to Rep-binding sites (RBS) present on the AAV genome and within AAVS1. In the absence of Rep, as is the case with AAV vectors, chromosomal integration is rare and random. For a genome-wide survey of wildtype AAV integration a linker-selection-mediated (LSM)-PCR strategy was designed to retrieve AAV-chromosomal junctions. DNA sequence determination revealed wildtype AAV integration sites scattered over the entire human genome. The bioinformatic analysis of these integration sites compared to those of rep-deficient AAV vectors revealed a highly significant overrepresentation of integration events near to consensus RBS. Integration hotspots included AAVS1 with 10\% of total events. Novel hotspots near consensus RBS were identified on chromosome 5p13.3 denoted AAVS2 and on chromsome 3p24.3 denoted AAVS3. AAVS2 displayed seven independent junctions clustered within only 14 bp of a consensus RBS which proved to bind Rep in vitro similar to the RBS in AAVS3. Expression of Rep in the presence of rep-deficient AAV vectors shifted targeting preferences from random integration back to the neighbourhood of consensus RBS at hotspots and numerous additional sites in the human genome. In summary, targeted AAV integration is not as specific for AAVS1 as previously assumed. Rather, Rep targets AAV to integrate into open chromatin regions in the reach of various, consensus RBS homologues in the human genome.},
    url = {http://publications.imp.fu-berlin.de/935/}
    }

2009

  • R. A. Bauer, K. Rother, P. Moor, K. Reinert, T. Steinke, J. M. Bujnicki, and R. Preissner, “Fast structural alignment of biomolecules using a hash table, n-grams and string descriptors,” Algorithms and molecular sciences, vol. 2, iss. 2, pp. 692-709, 2009.
    [Bibtex]
    @article{fu_mi_publications450,
    year = {2009},
    author = {R. A. Bauer and K. Rother and P. Moor and K. Reinert and T. Steinke and J. M. Bujnicki and R. Preissner},
    pages = {692--709},
    number = {2},
    volume = {2},
    title = {Fast Structural Alignment of Biomolecules Using a Hash Table, N-Grams and String Descriptors},
    publisher = {MDPI},
    journal = {Algorithms and Molecular Sciences},
    url = {http://publications.imp.fu-berlin.de/450/},
    abstract = {This work presents a generalized approach for the fast structural alignment
    of thousands of macromolecular structures. The method uses string representations of a macromolecular structure and a hash table that stores n-grams of a certain size for searching.
    To this end, macromolecular structure-to-string translators were implemented for protein and RNA structures. A query against the index is performed in two hierarchical steps to unite speed and precision. In the ?rst step the query structure is translated into n-grams, and all target structures containing these n-grams are retrieved from the hash table. In the second
    step all corresponding n-grams of the query and each target structure are subsequently aligned, and after each alignment a score is calculated based on the matching n-grams of query and target. The extendable framework enables the user to query and structurally align thousands of protein and RNA structures on a commodity machine and is available as
    open source from http://la jolla.sf.net. }
    }
  • T. Rausch, S. Koren, G. Denisov, D. Weese, A. -K. Emde, A. Döring, and K. Reinert, “A consistency-based consensus algorithm for de novo and reference-guided sequence assembly of short reads,” Bioinformatics, vol. 25, iss. 9, pp. 1118-1124, 2009.
    [Bibtex]
    @article{fu_mi_publications392,
    journal = {Bioinformatics},
    publisher = {Oxford University Press},
    title = {A consistency-based consensus algorithm for de novo and reference-guided  sequence assembly of short reads},
    volume = {25},
    author = {T. Rausch and S. Koren and G. Denisov and D. Weese and A.-K. Emde and A. D{\"o}ring and K. Reinert},
    pages = {1118--1124},
    number = {9},
    year = {2009},
    abstract = {Motivation: Novel high-throughput sequencing technologies pose new algorithmic challenges in handling massive amounts of short- read, high-coverage data. A robust and versatile consensus tool is of particular interest for such data since a sound multi-read alignment is a prerequisite for variation analyses, accurate genome assemblies and insert sequencing. Results: A multi-read alignment algorithm for de novo or reference- guided genome assembly is presented. The program identifies segments shared by multiple reads and then aligns these segments using a consistency-enhanced alignment graph. On real de novo sequencing data obtained from the newly established NCBI Short Read Archive, the program performs similarly in quality to other comparable programs. On more challenging simulated datasets for insert sequencing and variation analyses, our program outperforms the other tools. Availability: The consensus program can be downloaded from http://www.seqan.de/projects/consensus.html. It can be used stand- alone or in conjunction with the Celera Assembler. Both application scenarios as well as the usage of the tool are described in the documentation. Contact: rausch@inf.fu-berlin.de},
    url = {http://publications.imp.fu-berlin.de/392/}
    }
  • D. Weese, A. -K. Emde, T. Rausch, A. Döring, and K. Reinert, “Razers – fast read mapping with sensitivity control,” Genome research, vol. 19, iss. 9, pp. 1646-1654, 2009.
    [Bibtex]
    @article{fu_mi_publications453,
    title = {RazerS - Fast Read Mapping with Sensitivity Control},
    volume = {19},
    month = {July},
    number = {9},
    pages = {1646--1654},
    author = {D. Weese and A.-K. Emde and T. Rausch and A. D{\"o}ring and K. Reinert},
    year = {2009},
    journal = {Genome Research},
    url = {http://publications.imp.fu-berlin.de/453/},
    abstract = {Second-generation sequencing technologies deliver DNA sequence data at unprecedented high throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. Due to the large amounts of data, e?cient algorithms and implementations are crucial for this task. We present an e?cient read mapping tool called RazerS. It allows the user to align sequencing reads of arbitrary length using either the Hamming distance or the edit distance. Our tool can work either lossless or with a user-de?ned loss rate at higher speeds. Given the loss rate, we present an approach that guarantees not to lose more reads than speci?ed. This enables the user to adapt to the problem at hand and provides a seamless tradeo? between sensitivity and running time.}
    }
  • A. Zerck, E. Nordhoff, A. Reseman, E. Mirgorodskaya, D. Suckau, K. Reinert, H. Lehrach, and J. Gobom, “An iterative strategy for precursor ion selection for lc-ms/ms based shotgun proteomics,” Journal of proteome research, 2009.
    [Bibtex]
    @article{fu_mi_publications456,
    month = {April},
    publisher = {American Chemical Society},
    journal = {Journal of Proteome Research},
    title = {An iterative strategy for precursor ion selection for LC-MS/MS based shotgun proteomics},
    year = {2009},
    author = {A. Zerck and E. Nordhoff and A. Reseman and E. Mirgorodskaya and D. Suckau and K. Reinert and H. Lehrach and J. Gobom},
    abstract = {Currently, the precursor ion selection strategies in LC-MS mainly choose the most prominent peptide signals for MS/MS analysis. Consequently, high abundant proteins are identified by MS/MS of many peptides whereas proteins of lower abundance might elude identification. We present a novel, iterative and result-driven approach for precursor ion selection that significantly increases the efficiency of an MS/MS analysis by decreasing data redundancy and analysis time. By simulating different strategies for precursor ion selection on an existing dataset we compare our method to existing result-driven strategies and evaluate its performance with regard to mass accuracy, database size, and sample complexity.},
    url = {http://publications.imp.fu-berlin.de/456/}
    }

2008

  • M. Bauer, G. W. Klau, and K. Reinert, “An exact mathematical programming approach to multiple rna sequence-structure alignment.,” Algorithmic operations research, 2008.
    [Bibtex]
    @article{fu_mi_publications331,
    year = {2008},
    author = {M. Bauer and G. W. Klau and K. Reinert},
    title = {An Exact Mathematical Programming Approach to Multiple RNA Sequence-Structure  Alignment.},
    journal = {Algorithmic Operations Research},
    note = {to appear},
    url = {http://publications.imp.fu-berlin.de/331/}
    }
  • M. Bodirsky, C. Gröpl, and M. Kang, “Generating unlabeled connected cubic planar graphs uniformly at random,” Random structures and algorithms, vol. 32, iss. 2, pp. 157-180, 2008.
    [Bibtex]
    @article{fu_mi_publications338,
    volume = {32},
    journal = {Random Structures and Algorithms},
    title = {Generating unlabeled connected cubic planar graphs uniformly at random},
    year = {2008},
    pages = {157--180},
    number = {2},
    author = {M. Bodirsky and C. Gr{\"o}pl and M. Kang},
    abstract = {We present an expected polynomial time algorithm to generate an unlabeled  connected cubic planar graph uniformly at random. We first consider
    rooted connected cubic planar graphs, i.e., we count connected cubic
    planar graphs up to isomorphisms that fix a certain directed edge.
    Based on decompositions along the connectivity structure, we derive
    recurrence formulas for the exact number of rooted cubic planar graphs.
    This leads to rooted 3-connected cubic planar graphs, which have
    a unique embedding on the sphere. Special care has to be taken for
    rooted graphs that have a sense-reversing automorphism. Therefore
    we introduce the concept of colored networks, which stand in bijective
    correspondence to rooted 3-connected cubic planar graphs with given
    symmetries. Colored networks can again be decomposed along the connectivity
    structure. For rooted 3-connected cubic planar graphs embedded in
    the plane, we switch to the dual and count rooted triangulations.
    Since all these numbers can be evaluated in polynomial time using
    dynamic programming, rooted connected cubic planar graphs can be
    generated uniformly at random in polynomial time by inverting the
    decomposition along the connectivity structure. To generate connected
    cubic planar graphs without a root uniformly at random, we apply
    rejection sampling and obtain an expected polynomial time algorithm},
    url = {http://publications.imp.fu-berlin.de/338/}
    }
  • A. Döring, D. Weese, T. Rausch, and K. Reinert, “Seqan — an efficient, generic c++ library for sequence analysis,” Bmc bioinformatics, vol. 9, iss. 1, p. 11, 2008.
    [Bibtex]
    @article{fu_mi_publications455,
    journal = {BMC Bioinformatics},
    title = {SeqAn -- An efficient, generic C++ library for sequence analysis},
    volume = {9},
    pages = {11},
    number = {1},
    author = {A. D{\"o}ring and D. Weese and T. Rausch and K. Reinert},
    year = {2008},
    abstract = {BACKGROUND:The use of novel algorithmic techniques is pivotal to many important problems in life science. For example the sequencing of the human genome [1] would not have been possible without advanced assembly algorithms. However, owing to the high speed of technological progress and the urgent need for bioinformatics tools, there is a widening gap between state-of-the-art algorithmic techniques and the actual algorithmic components of tools that are in widespread use.RESULTS:To remedy this trend we propose the use of SeqAn, a library of efficient data types and algorithms for sequence analysis in computational biology. SeqAn comprises implementations of existing, practical state-of-the-art algorithmic components to provide a sound basis for algorithm testing and development. In this paper we describe the design and content of SeqAn and demonstrate its use by giving two examples. In the first example we show an application of SeqAn as an experimental platform by comparing different exact string matching algorithms. The second example is a simple version of the well-known MUMmer tool rewritten in SeqAn. Results indicate that our implementation is very efficient and versatile to use.CONCLUSION:We anticipate that SeqAn greatly simplifies the rapid development of new bioinformatics tools by providing a collection of readily usable, well-designed algorithmic components which are fundamental for the field of sequence analysis. This leverages not only the implementation of new algorithms, but also enables a sound analysis and comparison of existing algorithms.},
    url = {http://publications.imp.fu-berlin.de/455/}
    }
  • E. Lange, R. Tautenhahn, S. Neumann, and C. Gröpl, “Critical assessment of alignment procedures for lc-ms proteomics and metabolomic measurements,” Bmc bioinformatics, vol. 9, iss. 375, 2008.
    [Bibtex]
    @article{fu_mi_publications379,
    number = {375},
    author = {E. Lange and R. Tautenhahn and S. Neumann and C. Gr{\"o}pl},
    year = {2008},
    journal = {BMC Bioinformatics},
    title = {Critical assessment of alignment procedures for LC-MS proteomics  and metabolomic measurements},
    volume = {9},
    url = {http://publications.imp.fu-berlin.de/379/}
    }
  • T. Rausch, A. -K. Emde, and K. Reinert, “Robust consensus computation,” Bmc bioinformatics, vol. 9, iss. Suppl 10, p. P4, 2008.
    [Bibtex]
    @article{fu_mi_publications390,
    author = {T. Rausch and A.-K. Emde and K. Reinert},
    pages = {P4},
    number = {Suppl 10},
    year = {2008},
    title = {Robust consensus computation},
    journal = {BMC Bioinformatics},
    volume = {9},
    url = {http://publications.imp.fu-berlin.de/390/}
    }
  • T. Rausch, A. Thomas, N. J. Camp, and L. A. Facelli, “A parallel genetic algorithm to discover patterns in genetic markers that indicate predisposition to multifactorial disease,” Comput. biol. med., vol. 38, pp. 826-836, 2008.
    [Bibtex]
    @article{fu_mi_publications394,
    month = {July},
    volume = {38},
    title = {A parallel genetic algorithm to discover patterns in genetic markers  that indicate predisposition to multifactorial disease},
    journal = {Comput. Biol. Med.},
    year = {2008},
    author = {T. Rausch and A. Thomas and N. J. Camp and L. A. Facelli},
    pages = {826--836},
    abstract = {This paper describes a novel algorithm to analyze genetic linkage  data using pattern recognition techniques and genetic algorithms
    (GA). The method allows a search for regions of the chromosome that
    may contain genetic variations that jointly predispose individuals
    for a particular disease. The method uses correlation analysis, filtering
    theory and genetic algorithms to achieve this goal. Because current
    genome scans use from hundreds to hundreds of thousands of markers,
    two versions of the method have been implemented. The first is an
    exhaustive analysis version that can be used to visualize, explore,
    and analyze small genetic data sets for two marker correlations;
    the second is a GA version, which uses a parallel implementation
    allowing searches of higher-order correlations in large data sets.
    Results on simulated data sets indicate that the method can be informative
    in the identification of major disease loci and gene{\^a}??gene interactions
    in genome-wide linkage data and that further exploration of these
    techniques is justified. The results presented for both variants
    of the method show that it can help genetic epidemiologists to identify
    promising combinations of genetic factors that might predispose to
    complex disorders. In particular, the correlation analysis of IBD
    expression patterns might hint to possible gene{\^a}??gene interactions
    and the filtering might be a fruitful approach to distinguish true
    correlation signals from noise.},
    url = {http://publications.imp.fu-berlin.de/394/}
    }
  • T. Rausch, A. -K. Emde, D. Weese, C. Notredame, and K. Reinert, “Segment-based multiple sequence alignment,” Bioinformatics, vol. 24, iss. 16, p. i187–192, 2008.
    [Bibtex]
    @article{fu_mi_publications391,
    author = {T. Rausch and A.-K. Emde and D. Weese and C. Notredame and K. Reinert},
    number = {16},
    pages = {i187--192},
    year = {2008},
    title = {Segment-based multiple sequence alignment},
    journal = {Bioinformatics},
    volume = {24},
    abstract = {Motivation: Many multiple sequence alignment tools have been developed  in the past, progressing either in speed or alignment accuracy. Given
    the importance and wide-spread use of alignment tools, progress in
    both categories is a contribution to the community and has driven
    research in the field so far. Results: We introduce a graph-based
    extension to the consistency-based, progressive alignment strategy.
    We apply the consistency notion to segments instead of single characters.
    The main problem we solve in this context is to define segments of
    the sequences in such a way that a graph-based alignment is possible.
    We implemented the algorithm using the SeqAn library and report results
    on amino acid and DNA sequences. The benefit of our approach is threefold:
    (1) sequences with conserved blocks can be rapidly aligned, (2) the
    implementation is conceptually easy, generic and fast and (3) the
    consistency idea can be extended to align multiple genomic sequences.
    Availability: The segment-based multiple sequence alignment tool
    can be downloaded from http://www.seqan.de/projects/msa.html. A novel
    version of T-Coffee interfaced with the tool is available from http://www.tcoffee.org.
    The usage of the tool is described in both documentations. Contact:
    rausch@inf.fu-berlin.de},
    url = {http://publications.imp.fu-berlin.de/391/}
    }
  • M. H. Schulz, D. Weese, T. Rausch, A. Döring, K. Reinert, and M. Vingron, “Fast and adaptive variable order markov chain construction,” in Proceedings of the 8th international workshop in algorithms in bioinformatics (wabi’08), 2008, pp. 306-317.
    [Bibtex]
    @inproceedings{fu_mi_publications401,
    pages = {306--317},
    author = {M. H. Schulz and D. Weese and T. Rausch and A. D{\"o}ring and K. Reinert and M. Vingron},
    booktitle = {Proceedings of the 8th International Workshop in Algorithms in Bioinformatics  (WABI'08)},
    year = {2008},
    publisher = {Springer Verlag},
    title = {Fast and Adaptive Variable Order Markov Chain Construction},
    abstract = { Variable order Markov chains (VOMCs) are a flexible class of models  that extend the well-known Markov chains. They have been applied
    to a variety of problems in computational biology, e.g. protein family
    classification. A linear time and space construction algorithm has
    been published in 2000 by Apostolico and Bejerano. However, neither
    a report of the actual running time nor an implementation of it have
    ever been published since. Using their theoretical results, we implement
    general and problem oriented algorithms considering recent advances
    in string matching. We introduce a new software which is orders of
    magnitudes faster than current tools for building VOMCs, and is suitable
    for large scale analysis. Along the way we show that the lazy suffix
    tree algorithm by Giegerich and others can compete with state-of-the-art
    suffix array methods in terms of time and space under the type of
    constraints we have analyzed in this work. },
    url = {http://publications.imp.fu-berlin.de/401/}
    }
  • O. Schulz-Trieglaff, R. Hussong, C. Gröpl, A. Hildebrandt, A. Hildebrandt, C. Huber, and K. Reinert, “Computational quantification of peptides from lc-ms data,” Journal of computational biology, vol. 15, iss. 7, pp. 685-704, 2008.
    [Bibtex]
    @article{fu_mi_publications406,
    volume = {15},
    journal = {Journal of Computational Biology},
    title = {Computational Quantification of Peptides from LC-MS data},
    year = {2008},
    number = {7},
    pages = {685--704},
    author = {O. Schulz-Trieglaff and R. Hussong and C. Gr{\"o}pl and A. Hildebrandt and A. Hildebrandt and Ch. Huber and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/406/},
    abstract = {Liquid chromatography coupled to mass spectrometry (LC-MS) has become  a major tool for the study of biological processes. High-throughput
    LC-MS experimentsare frequently conducted in modern laboratories,
    generating an enormous amountof data per day. A manual inspection
    is therefore no longer a feasible task. Consequently, there is a
    need for computational tools that can rapidly provide informationabout
    mass, elution time, and abundance of the compounds in a LC-MS sample.
    Wepresent an algorithm for the detection and quantification of peptides
    in LC-MS data. Our approach is flexible and independent of the MS
    technology in use. It is basedon a combination of the sweep line
    paradigm with a novel wavelet function tailoredto detect isotopic
    patterns of peptides. We propose a simple voting schema to usethe
    redundant information in consecutive scans for an accurate determination
    ofmonoisotopic masses and charge states. By explicitly modeling the
    instrument inaccuracy, we are also able to cope with data sets of
    different quality and resolution.We evaluate our technique on data
    from different instruments and show that we canrapidly estimate mass,
    centroid of retention time and abundance of peptides in a sound algorithmic
    framework. Finally, we compare the performance of our method to several
    other techniques on three data sets of varying complexity.},
    keywords = {computational mass spectrometry, liquid chromatography - massspectrometry,  quantification, wavelets}
    }
  • O. Schulz-Trieglaff, N. Pfeifer, C. Gröpl, O. Kohlbacher, and K. Reinert, “Lc-mssim – a simulation software for liquid chromatography mass spectrometry data,” Bmc bioinformatics, vol. 9, iss. 423, 2008.
    [Bibtex]
    @article{fu_mi_publications408,
    title = {LC-MSsim - a simulation software for liquid chromatography mass  spectrometry data},
    journal = {BMC Bioinformatics},
    volume = {9},
    author = {O. Schulz-Trieglaff and N. Pfeifer and C. Gr{\"o}pl and O. Kohlbacher and K. Reinert},
    number = {423},
    year = {2008},
    abstract = {BACKGROUND: Mass Spectrometry coupled to Liquid Chromatography (LC-MS)  is commonly used to analyze the protein content of biological samples
    in large scale studies. The data resulting from an LC-MS experiment
    is huge, highly complex and noisy. Accordingly, it has sparked new
    developments in Bioinformatics, especially in the fields of algorithm
    development, statistics and software engineering. In a quantitative
    label-free mass spectrometry experiment, crucial steps are the detection
    of peptide features in the mass spectra and the alignment of samples
    by correcting for shifts in retention time. At the moment, it is
    difficult to compare the plethora of algorithms for these tasks.
    So far, curated benchmark data exists only for peptide identification
    algorithms but no data that represents a ground truth for the evaluation
    of feature detection, alignment and filtering algorithms. RESULTS:
    We present LC-MSsim, a simulation software for LC-ESI-MS experiments.
    It simulates ESI spectra on the MS level. It reads a list of proteins
    from a FASTA file and digests the protein mixture using a user-defined
    enzyme. The software creates an LC-MS data set using a predictor
    for the retention time of the peptides and a model for peak shapes
    and elution profiles of the mass spectral peaks. Our software also
    offers the possibility to add contaminants, to change the background
    noise level and includes a model for the detectability of peptides
    in mass spectra. After the simulation, LC-MSsim writes the simulated
    data to public XML formats (mzXML or mzData). The software also stores
    the positions (monoisotopic m/z and retention time) and ion counts
    of the simulated ions in separate files. CONCLUSIONS: LC-MSsim generates
    simulated LC-MS data sets and incorporates models for peak shapes
    and contaminations. Algorithm developers can match the results of
    feature detection and alignment algorithms against the simulated
    ion lists and meaningful error rates can be computed. We anticipate
    that LC-MSsim will be useful to the wider community to perform benchmark
    studies and comparisons between computational tools.},
    keywords = {algorithm, benchmark, lc-ms-ms, massspec, metabolomics, proteomics},
    url = {http://publications.imp.fu-berlin.de/408/}
    }
  • M. Sturm, B. Andreas, C. Gröpl, R. Hussong, E. Lange, N. Pfeifer, O. Schulz-Trieglaff, A. Zerck, K. Reinert, and O. Kohlbacher, “Openms – an open-source software framework for mass spectrometry,” Bmc bioinformatics, vol. 9, iss. 163, 2008.
    [Bibtex]
    @article{fu_mi_publications409,
    title = {OpenMS - An open-source software framework for mass spectrometry},
    journal = {BMC Bioinformatics},
    volume = {9},
    author = {M. Sturm and B. Andreas and C. Gr{\"o}pl and R. Hussong and E. Lange and N. Pfeifer and O. Schulz-Trieglaff and A. Zerck and K. Reinert and O. Kohlbacher},
    number = {163},
    year = {2008},
    url = {http://publications.imp.fu-berlin.de/409/},
    keywords = {lc-ms-ms, massspec, proteomics},
    abstract = {BACKGROUND: Mass spectrometry is an essential analytical technique  for high-throughput analysis in proteomics and metabolomics. The
    development of new separation techniques, precise mass analyzers
    and experimental protocols is a very active field of research. This
    leads to more complex experimental setups yielding ever increasing
    amounts of data. Consequently, analysis of the data is currently
    often the bottleneck for experimental studies. Although software
    tools for many data analysis tasks are available today, they are
    often hard to combine with each other or not flexible enough to allow
    for rapid prototyping of a new analysis workflow. RESULTS: We present
    OpenMS, a software framework for rapid application development in
    mass spectrometry. OpenMS has been designed to be portable, easy-to-use
    and robust while offering a rich functionality ranging from basic
    data structures to sophisticated algorithms for data analysis. This
    has already been demonstrated in several studies. CONCLUSIONS: OpenMS
    is available under the Lesser GNU Public License (LGPL) from the
    project website at http://www.openms.de.}
    }
  • D. Weese and M. H. Schulz, “Efficient string mining under constraints via the deferred frequency index,” in Proceedings of the 8th industrial conference on data mining (icdm’08), 2008, pp. 374-388.
    [Bibtex]
    @inproceedings{fu_mi_publications412,
    publisher = {Springer Verlag},
    editor = {P. Perner},
    booktitle = {Proceedings of the 8th Industrial Conference on Data Mining (ICDM'08)},
    month = {July},
    title = {Efficient String Mining under Constraints via the Deferred Frequency  Index},
    year = {2008},
    pages = {374--388},
    author = {D. Weese and M. H. Schulz},
    url = {http://publications.imp.fu-berlin.de/412/},
    abstract = { We propose a general approach for frequency based string mining,  which has many applications, e.g. in contrast data mining. Our contribution
    is a novel algorithm based on a deferred data structure. Despite
    its simplicity, our approach is up to 4 times faster and uses about
    half the memory compared to the best-known algorithm of Fischer et
    al. Applications in various string domains, e.g. natural language,
    DNA or protein sequences, demonstrate the improvement of our algorithm.}
    }

2007

  • M. Bauer, G. W. Klau, and K. Reinert, “Accurate multiple sequence-structure alignment of rna sequences using combinatorial optimization.,” Bmc bioinformatics, vol. 8, iss. 1, p. 271, 2007.
    [Bibtex]
    @article{fu_mi_publications332,
    journal = {BMC Bioinformatics},
    volume = {8},
    month = {July},
    title = {Accurate multiple sequence-structure alignment of RNA sequences using  combinatorial optimization.},
    year = {2007},
    pages = {271},
    number = {1},
    author = {M. Bauer and G. W. Klau and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/332/}
    }
  • M. Bodirsky, C. Gröpl, D. Johannsen, and M. Kang, “A direct decomposition of 3-connected planar graphs,” Séminaire lotharingien de combinatoire, vol. B54Ak, p. 15 pages, 2007.
    [Bibtex]
    @article{fu_mi_publications336,
    volume = {B54Ak},
    title = {A direct decomposition of 3-connected planar graphs},
    journal = {S{\'e}minaire Lotharingien de Combinatoire},
    address = {Taormina},
    year = {2007},
    author = {M. Bodirsky and C. Gr{\"o}pl and D. Johannsen and M. Kang},
    pages = {15 pages},
    url = {http://publications.imp.fu-berlin.de/336/},
    keywords = {Three-Connected Planar Graph, Cnet, Planar Graph, Enumeration, Random  graphs, Random Generation, Dynamic Programming, Graph Theory}
    }
  • M. Bodirsky, C. Gröpl, and M. Kang, “Generating labeled planar graphs uniformly at random,” Theoretical computer science, vol. 379, iss. 3, pp. 377-386, 2007.
    [Bibtex]
    @article{fu_mi_publications339,
    journal = {Theoretical Computer Science},
    title = {Generating labeled planar graphs uniformly at random},
    volume = {379},
    number = {3},
    pages = {377--386},
    author = {M. Bodirsky and C. Gr{\"o}pl and M. Kang},
    year = {2007},
    url = {http://publications.imp.fu-berlin.de/339/},
    abstract = {We present a deterministic polynomial time algorithm to sample a labeled  planar graph uniformly at random. Our approach uses recursive formulae
    for the exact number of labeled planar graphs with n vertices and
    m edges, based on a decomposition into 1-, 2-, and 3-connected components.
    We can then use known sampling algorithms and counting formulae for
    3-connected planar graphs.},
    keywords = {Labeled planar graph, Enumeration, Decomposition, Sampling algorithm,  Dynamic programming, Graph theory}
    }
  • D. Fasulo, A. -K. Emde, L. -Y. Wang, and N. J. Edwards, “Alignment of mass spectrometry data by clique finding and optimization,” in Systems biology and computational proteomics, 2007, pp. 119-129.
    [Bibtex]
    @inproceedings{fu_mi_publications345,
    author = {D. Fasulo and A.-K. Emde and L.-Y. Wang and N. J. Edwards},
    pages = {119--129},
    year = {2007},
    title = {Alignment of Mass Spectrometry Data by Clique Finding and Optimization},
    series = {Lecture Notes in Computer Science},
    volume = {4532},
    booktitle = {Systems Biology and Computational Proteomics},
    publisher = {Springer Verlag},
    abstract = {Mass spectrometry (MS) is becoming a popular approach for quantifying  the protein composition of complex samples. A great challenge for
    comparative proteomic profiling is to match corresponding peptide
    features from different experiments to ensure that the same protein
    intensities are correctly identified. Multi-dimensional data acquisition
    from liquid-chromatography mass spectrometry (LC-MS) makes the alignment
    problem harder. We propose a general paradigm for aligning peptide
    features using a bounded error model. Our method is tolerant of imperfect
    measurements, missing peaks, and extraneous peaks. It can handle
    an arbitrary number of dimensions of separation, and is very fast
    in practice even for large data sets. Finally, its parameters are
    intuitive and we describe a heuristic for estimating them automatically.
    We demonstrate results on single- and multi-dimensional data.},
    url = {http://publications.imp.fu-berlin.de/345/}
    }
  • G. W. Klau, S. Rahmann, A. Schliep, M. Vingron, and K. Reinert, “Integer linear programming approaches for non-unique probe selection,” Discrete applied mathematics, vol. 155, pp. 840-856, 2007.
    [Bibtex]
    @article{fu_mi_publications371,
    volume = {155},
    title = {Integer Linear Programming Approaches for Non-unique Probe Selection},
    journal = {Discrete Applied Mathematics},
    year = {2007},
    author = {G. W. Klau and S. Rahmann and A. Schliep and M. Vingron and K. Reinert},
    pages = {840--856},
    url = {http://publications.imp.fu-berlin.de/371/}
    }
  • E. Lange, C. Gröpl, O. Schulz-Trieglaff, A. Leinenbach, C. Huber, and K. Reinert, “A geometric approach for the alignment of liquid chromatography-mass spectrometry data,” Oxford journals, vol. 23, iss. 13, p. i273–i281, 2007.
    [Bibtex]
    @article{fu_mi_publications1133,
    author = {E. Lange and C. Gr{\"o}pl and O. Schulz-Trieglaff and A. Leinenbach and Ch. Huber and K. Reinert},
    pages = {i273--i281},
    number = {13},
    year = {2007},
    title = {A Geometric Approach for the Alignment of Liquid Chromatography-Mass  Spectrometry Data},
    volume = {23},
    booktitle = {Proceedings of the 15th Annual International Conference on Intelligent  Systems for Molecular Biology (ISMB) \&amp; 6th European Conference on Computational Biology (ECCB)},
    journal = {Oxford Journals},
    publisher = {Oxford University Press},
    abstract = {Motivation: Liquid chromatography coupled to mass spectrometry (LC-MS) and combined with tandem mass spectrometry (LC-MS/MS) have become a prominent tool for the analysis of complex proteomic samples. An important step in a typical workflow is the combination of results from multiple LC-MS experiments to improve confidence in the obtained measurements or to compare results from different samples. To do so, a suitable mapping or alignment between the data sets needs to be estimated. The alignment has to correct for variations in mass and elution time which are present in all mass spectrometry experiments.
    Results: We propose a novel algorithm to align LC-MS samples and to match corresponding ion species across samples. Our algorithm matches landmark signals between two data sets using a geometric technique based on pose clustering. Variations in mass and retention time are corrected by an affine dewarping function estimated from matched landmarks. We use the pairwise dewarping in an algorithm for aligning multiple samples. We show that our pose clustering approach is fast and reliable as compared to previous approaches. It is robust in the presence of noise and able to accurately align samples with only few common ion species. In addition, we can easily handle different kinds of LC-MS data and adopt our algorithm to new mass spectrometry technologies.
    Availability: This algorithm is implemented as part of the OpenMS software library for shotgun proteomics and available under the Lesser GNU Public License (LGPL) at www.openms.de},
    url = {http://publications.imp.fu-berlin.de/1133/}
    }
  • P. May, G. W. Klau, M. Bauer, and T. Steinke, “Accelerated microrna precursor detection using the smith-waterman algorithm on fpgas,” in Proc. of gccb 2006, 2007, pp. 19-32.
    [Bibtex]
    @inproceedings{fu_mi_publications384,
    title = {Accelerated microRNA Precursor Detection Using the Smith-Waterman  Algorithm on FPGAs},
    volume = {4360},
    series = {LNBI},
    pages = {19--32},
    booktitle = {Proc. of GCCB 2006},
    author = {P. May and G. W. Klau and M. Bauer and T. Steinke},
    year = {2007},
    url = {http://publications.imp.fu-berlin.de/384/}
    }
  • C. Meier, Bioinformatik: aktuelle proteinausstattung erlaubt aussagen über den gesundheitszustand. software hilft ärzten künftig bei der diagnose., 2007.
    [Bibtex]
    @misc{fu_mi_publications386,
    year = {2007},
    author = {Ch. Meier},
    month = {June},
    note = {Der Artikel eines freien Medienjournalisten beschreibt den Einsatz  von OpenMS in der Proteomik},
    title = {Bioinformatik: Aktuelle Proteinausstattung erlaubt Aussagen  {\"u}ber den Gesundheitszustand. Software hilft {\"A}rzten k{\"u}nftig
    bei der Diagnose.},
    url = {http://publications.imp.fu-berlin.de/386/}
    }
  • K. Reinert, M. Bauer, A. Döring, and A. L. Halpern, “A general paradigm for fast and adaptive clustering of biological sequences,” in German conference on bioinformatics (gcb 2007), 2007, pp. 15-29.
    [Bibtex]
    @inproceedings{fu_mi_publications395,
    title = {A general paradigm for fast and adaptive clustering of biological  sequences},
    pages = {15--29},
    author = {K. Reinert and M. Bauer and A. D{\"o}ring and A. L. Halpern},
    booktitle = {German Conference on Bioinformatics (GCB 2007)},
    year = {2007},
    url = {http://publications.imp.fu-berlin.de/395/}
    }
  • K. Reinert and D. H. Huson, “Sequence assembly.” Weinheim: Wiley-VCH, 2007, vol. 1, pp. 25-55.
    [Bibtex]
    @incollection{fu_mi_publications396,
    publisher = {Wiley-VCH},
    series = {Bioinformatics - From Genomes to Therapies},
    volume = {1},
    month = {December},
    title = {Sequence Assembly},
    address = {Weinheim},
    year = {2007},
    pages = {25--55},
    author = {K. Reinert and D. H. Huson},
    url = {http://publications.imp.fu-berlin.de/396/}
    }
  • O. Schulz-Trieglaff, R. Hussong, C. Gröpl, A. Hildebrandt, and K. Reinert, “A fast and accurate algorithm for the quantification of peptides from mass spectrometry data,” in Proceedings of the eleventh annual international conference on research in computational molecular biology (recomb 2007), 2007, pp. 473-487.
    [Bibtex]
    @inproceedings{fu_mi_publications407,
    title = {A Fast and Accurate Algorithm for the Quantification of Peptides  from Mass Spectrometry data},
    year = {2007},
    booktitle = {Proceedings of the Eleventh Annual International Conference on Research  in Computational Molecular Biology (RECOMB 2007)},
    author = {O. Schulz-Trieglaff and R. Hussong and C. Gr{\"o}pl and A. Hildebrandt and K. Reinert},
    pages = {473--487},
    url = {http://publications.imp.fu-berlin.de/407/}
    }

2006

  • E. Althaus, A. Caprara, H. -P. Lenhof, and K. Reinert, “A branch-and-cut algorithm for multiple sequence alignment,” Mathematical programming, iss. 105, pp. 387-425, 2006.
    [Bibtex]
    @article{fu_mi_publications325,
    year = {2006},
    number = {105},
    pages = {387--425},
    author = {E. Althaus and A. Caprara and H.-P. Lenhof and K. Reinert},
    journal = {Mathematical Programming},
    title = {A Branch-and-Cut Algorithm for Multiple Sequence Alignment},
    url = {http://publications.imp.fu-berlin.de/325/}
    }
  • O. Kohlbacher, K. Reinert, C. Gröpl, E. Lange, N. Pfeiffer, O. Schulz-Trieglaff, and M. Sturm, “Topp – the openms proteomics pipeline,” in Proceedings of the 5th european conference on computational biology (eccb 2006), 2006.
    [Bibtex]
    @inproceedings{fu_mi_publications375,
    year = {2006},
    author = {O. Kohlbacher and K. Reinert and C. Gr{\"o}pl and E. Lange and N. Pfeiffer and O. Schulz-Trieglaff and M. Sturm},
    booktitle = {Proceedings of the 5th European Conference on Computational Biology  (ECCB 2006)},
    title = {TOPP - The OpenMS Proteomics Pipeline},
    abstract = {Motivation: Experimental techniques in proteomics have seen rapid  development over the last few years. Volume and complexity of the
    data have both been growing at a similar rate. Accordingly, data
    management and analysis are one of the major challenges in proteomics.
    Flexible algorithms are required to handle changing experimental
    setups and to assist in developing and validating new methods. In
    order to facilitate these studies, it would be desirable to have
    a flexible `toolbox' of versatile and user-friendly applications
    allowing for rapid construction of computational workflows in proteomics.
    Results: We describe a set of tools for proteomics data analysis
    -- TOPP, The OpenMS Proteomics Pipeline. TOPP provides a set of computational
    tools which can be easily combined into analysis pipelines even by
    non-experts and can be used in proteomics workflows. These applications
    range from useful utilities (file format conversion, peak picking)
    over wrapper applications for known applications (e.g. Mascot) to
    completely new algorithmic techniques for data reduction and data
    analysis. We anticipate that TOPP will greatly facilitate rapid prototyping
    of proteomics data evaluation pipelines. As such, we describe the
    basic concepts and the current abilities of TOPP and illustrate these
    concepts in the context of two example applications: the identification
    of peptides from a raw data set through database search and the complex
    analysis of a standard addition experiment for the absolute quantitation
    of biomarkers. The latter example demonstrates TOPP's ability to
    construct flexible analysis pipelines in support of complex experimental
    setups. Availability: The TOPP components are available as open-source
    software under the lesser GNU public license (LGPL). Source code
    is available from the project web site at www.OpenMS.de},
    url = {http://publications.imp.fu-berlin.de/375/}
    }
  • E. Lange, C. Gröpl, K. Reinert, and A. Hildebrandt, “High accuracy peak-picking of proteomics data using wavelet techniques,” in Proceedings of the 11th pacific symposium on biocomputing (psb-06), 2006, pp. 243-254.
    [Bibtex]
    @inproceedings{fu_mi_publications376,
    title = {High Accuracy Peak-Picking of Proteomics Data using Wavelet Techniques},
    year = {2006},
    booktitle = {Proceedings of the 11th Pacific Symposium on Biocomputing (PSB-06)},
    author = {E. Lange and C. Gr{\"o}pl and K. Reinert and A. Hildebrandt},
    pages = {243--254},
    url = {http://publications.imp.fu-berlin.de/376/}
    }
  • B. M. Mayr, O. Kohlbacher, K. Reinert, C. Gröpl, E. Lange, C. L. Klein, and C. Huber, “Absolute myoglobin quantitation in serum by combining two-dimensional liquid chromatography-electrospray ionization mass spectrometry and novel data analysis algorithms,” Journal of proteome research, vol. 5, pp. 414-421, 2006.
    [Bibtex]
    @article{fu_mi_publications385,
    pages = {414--421},
    author = {B. M. Mayr and O. Kohlbacher and K. Reinert and C. Gr{\"o}pl and E. Lange and C. L. Klein and Ch. Huber},
    year = {2006},
    journal = {Journal of Proteome Research},
    title = {Absolute Myoglobin Quantitation in Serum by Combining Two-Dimensional  Liquid Chromatography-Electrospray Ionization Mass Spectrometry and
    Novel Data Analysis Algorithms},
    volume = {5},
    url = {http://publications.imp.fu-berlin.de/385/}
    }
  • T. Rausch, Discovering causes of multifactorial diseases, 2006.
    [Bibtex]
    @unpublished{fu_mi_publications389,
    year = {2006},
    author = {T. Rausch},
    school = {Hasso-Plattner-Institut f{\"u}r Softwaresystemtechnik GmbH, Universit{\"a}t  Potsdam},
    month = {September},
    title = {Discovering causes of multifactorial diseases},
    url = {http://publications.imp.fu-berlin.de/389/}
    }
  • K. Reinert, O. Kohlbacher, C. Gröpl, E. Lange, O. Schulz-Trieglaff, M. Sturm, and N. Pfeifer, “Openms – a framework for quantitative hplc/ms-based proteomics,” in Computational proteomics, 2006.
    [Bibtex]
    @inproceedings{fu_mi_publications397,
    number = {05471},
    author = {K. Reinert and O. Kohlbacher and C. Gr{\"o}pl and E. Lange and O. Schulz-Trieglaff and M. Sturm and N. Pfeifer},
    year = {2006},
    title = {OpenMS - A Framework for Quantitative HPLC/MS-Based Proteomics},
    series = {Dagstuhl Seminar Proceedings},
    booktitle = {Computational Proteomics},
    note = {{\ensuremath{<}}span class='mathrm'{\ensuremath{>}}\&lt;{\ensuremath{<}}/span{\ensuremath{>}}http://drops.dagstuhl.de/opus/volltexte/2006/546{\ensuremath{<}}span class='mathrm'{\ensuremath{>}}\&gt;{\ensuremath{<}}/span{\ensuremath{>}} [date of citation:  2006-01-01]},
    publisher = {Internationales Begegnungs- und Forschungszentrum f{\~A}?r Informatik  (IBFI), Schloss Dagstuhl, Germany},
    editor = {C. G. Huber and O. Kohlbacher and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/397/}
    }
  • D. Weese, “Entwurf und implementierung eines generischen substring-index,” PhD Thesis, 2006.
    [Bibtex]
    @phdthesis{fu_mi_publications457,
    month = {May},
    school = {Humboldt-University},
    title = {Entwurf und Implementierung eines generischen Substring-Index},
    year = {2006},
    author = {D. Weese},
    url = {http://publications.imp.fu-berlin.de/457/}
    }
  • W. E. Wolski, M. Farrow, A. -K. Emde, L. Maciej, H. Lehrach, and K. Reinert, “Analytical model of peptide mass cluster centres with applications,” Proteome science, vol. 4, iss. 18, p. doi:10.1186/1477–5956, 2006.
    [Bibtex]
    @article{fu_mi_publications415,
    journal = {Proteome Science},
    title = {Analytical model of peptide mass cluster centres with applications},
    volume = {4},
    number = {18},
    pages = {doi:10.1186/1477--5956},
    author = {W. E. Wolski and M. Farrow and A.-K. Emde and L. Maciej and H. Lehrach and K. Reinert},
    year = {2006},
    url = {http://publications.imp.fu-berlin.de/415/}
    }

2005

  • V. Bafna and K. Reinert, “Mass spectrometry and computational proteomics,” in Encyclopedia of genetics, genomics, proteomics and bioinformatics, Wiley-Eastern, 2005.
    [Bibtex]
    @incollection{fu_mi_publications327,
    author = {V. Bafna and K. Reinert},
    booktitle = {Encyclopedia of Genetics, Genomics, Proteomics and Bioinformatics},
    year = {2005},
    title = {Mass Spectrometry and Computational Proteomics},
    publisher = {Wiley-Eastern},
    url = {http://publications.imp.fu-berlin.de/327/}
    }
  • M. Bauer, G. W. Klau, and K. Reinert, “Fast and accurate structural rna alignment by progressive lagrangian relaxation,” in Proceedings of the 1st international symposium on computational life science (complife-05), 2005, pp. 217-228.
    [Bibtex]
    @inproceedings{fu_mi_publications334,
    year = {2005},
    pages = {217--228},
    author = {M. Bauer and G. W. Klau and K. Reinert},
    booktitle = {Proceedings of the 1st International Symposium on Computational Life  Science (CompLife-05)},
    title = {Fast and Accurate Structural RNA Alignment by Progressive Lagrangian  Relaxation},
    url = {http://publications.imp.fu-berlin.de/334/}
    }
  • M. Bauer, G. W. Klau, and K. Reinert, “Multiple structural rna alignment with lagrangian relaxation,” in Proceedings of the 5th workshop on algorithms bioinformatics (wabi-05), 2005, pp. 303-314.
    [Bibtex]
    @inproceedings{fu_mi_publications333,
    pages = {303--314},
    booktitle = {Proceedings of the 5th Workshop on Algorithms Bioinformatics (WABI-05)},
    author = {M. Bauer and G. W. Klau and K. Reinert},
    year = {2005},
    title = {Multiple Structural RNA Alignment with Lagrangian Relaxation},
    url = {http://publications.imp.fu-berlin.de/333/}
    }
  • M. Bodirsky, C. Gröpl, D. Johannsen, and M. Kang, “A direct decomposition of 3-connected planar graphs,” in Proceedings of the 17th annual international conference on formal power series and algebraic combinatorics (fpsac05), Taormina, 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications337,
    address = {Taormina},
    year = {2005},
    booktitle = {Proceedings of the 17th Annual International Conference on Formal  Power Series and Algebraic Combinatorics (FPSAC05)},
    author = {M. Bodirsky and C. Gr{\"o}pl and D. Johannsen and M. Kang},
    title = {A direct decomposition of 3-connected planar graphs},
    url = {http://publications.imp.fu-berlin.de/337/},
    keywords = {Three-Connected Planar Graph, Cnet, Planar Graph, Enumeration, Random  graphs, Random Generation, Dynamic Programming, Graph Theory}
    }
  • M. Bodirsky, C. Gröpl, and M. Kang, “Sampling unlabeled biconnected planar graphs,” in Proceedings of the 16th annual international symposium on algorithms and computation (isaac05), 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications340,
    title = {Sampling Unlabeled Biconnected Planar Graphs},
    author = {M. Bodirsky and C. Gr{\"o}pl and M. Kang},
    booktitle = {Proceedings of the 16th Annual International Symposium on Algorithms  and Computation (ISAAC05)},
    year = {2005},
    url = {http://publications.imp.fu-berlin.de/340/},
    keywords = {Planar Graph, Enumeration, Random graphs, Random Generation, Graph  Theory}
    }
  • C. Gröpl, “An algorithm for feature finding in lc/ms raw data,” in Computational proteomics, 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications347,
    note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational  Proteomics, 20.-25. November 2005},
    publisher = {Dagstuhl Online Publication Server (DROPS)},
    title = {An Algorithm for Feature Finding in LC/MS Raw Data},
    year = {2005},
    booktitle = {Computational Proteomics},
    author = {C. Gr{\"o}pl},
    url = {http://publications.imp.fu-berlin.de/347/}
    }
  • C. Gröpl, A. Hildebrandt, E. Lange, S. Lövenich, and M. Sturm, Openms – software for mass spectrometry, 2005.
    [Bibtex]
    @misc{fu_mi_publications351,
    note = {http://mbi.osu.edu/2004/workshops2004.html},
    title = {OpenMS - Software for Mass Spectrometry},
    author = {C. Gr{\"o}pl and A. Hildebrandt and E. Lange and S. L{\"o}venich and M. Sturm},
    year = {2005},
    url = {http://publications.imp.fu-berlin.de/351/},
    keywords = {mass spectrometry, proteomics, open source software}
    }
  • C. Gröpl, A. Hildebrandt, E. Lange, and M. Sturm, Openms – a generic open source framework for hplc/ms-based proteomics, 2005.
    [Bibtex]
    @misc{fu_mi_publications352,
    title = {OpenMS - a generic open source framework for HPLC/MS-based proteomics},
    note = {http://www.hupo2005.com/},
    author = {C. Gr{\"o}pl and A. Hildebrandt and E. Lange and M. Sturm},
    year = {2005},
    keywords = {mass spectrometry, proteomics, open source software},
    url = {http://publications.imp.fu-berlin.de/352/}
    }
  • C. Gröpl, E. Lange, K. Reinert, M. Sturm, C. Huber, B. M. Mayr, and C. L. Klein, “Algorithms for the automated absolute quantification of diagnostic markers in complex proteomics samples,” in Proceedings of the 1st international symposium on computational life science (complife05), 2005, pp. 151-163.
    [Bibtex]
    @inproceedings{fu_mi_publications358,
    pages = {151--163},
    booktitle = {Proceedings of the 1st International Symposium on Computational Life  Science (CompLife05)},
    author = {C. Gr{\"o}pl and E. Lange and K. Reinert and M. Sturm and Ch. Huber and B. M. Mayr and C. L. Klein},
    year = {2005},
    title = {Algorithms for the automated absolute quantification of diagnostic  markers in complex proteomics samples},
    abstract = {HPLC-ESI-MS is rapidly becoming an established standard method for  shotgun proteomics. Currently, its major drawbacks are twofold: quantification
    is mostly limited to relative quantification and the large amount
    of data produced by every individual experiment can make manual analysis
    quite difficult. Here we present a new, combined experimental and
    algorithmic approach to absolutely quantify proteins from samples
    with unprecedented precision. We apply the method to the analysis
    of myoglobin in human blood serum, which is an important diagnostic
    marker for myocardial infarction. Our approach was able to determine
    the absolute amount of myoglobin in a serum sample through a series
    of standard addition experiments with a relative error of 2.5 percent.
    Compared to a manual analysis of the same dataset we could improve
    the precision and conduct it in a fraction of the time needed for
    the manual analysis. We anticipate that our automatic quantitation
    method will facilitate further absolute or relative quantitation
    of even more complex peptide samples. The algorithm was developed
    using our publically available software framework OpenMS (www.openms.de).},
    url = {http://publications.imp.fu-berlin.de/358/}
    }
  • C. L. Klein, O. Kohlbacher, and K. Reinert, “Reference methods and materials in standardisation and quality assurance (abstract),” Febs journal, vol. 272, iss. Supplement 1, pp. 490-504, 2005.
    [Bibtex]
    @article{fu_mi_publications373,
    author = {C. L. Klein and O. Kohlbacher and K. Reinert},
    pages = {490--504},
    number = {Supplement 1},
    year = {2005},
    title = {Reference methods and materials in standardisation and quality assurance  (abstract)},
    journal = {FEBS Journal},
    volume = {272},
    url = {http://publications.imp.fu-berlin.de/373/}
    }
  • E. Lange, C. Gröpl, K. Reinert, and A. Hildebrandt, “High-accuracy peak picking of proteomics data,” in Computational proteomics, 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications377,
    title = {High-accuracy peak picking of proteomics data},
    publisher = {Dagstuhl Online Publication Server (DROPS)},
    note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational  Proteomics, 20.-25. November 2005},
    author = {E. Lange and C. Gr{\"o}pl and K. Reinert and A. Hildebrandt},
    booktitle = {Computational Proteomics},
    year = {2005},
    url = {http://publications.imp.fu-berlin.de/377/}
    }
  • K. Reinert, O. Kohlbacher, C. Gröpl, E. Lange, O. Schulz-Trieglaff, M. Sturm, and N. Pfeifer, “Openms – a framework for quantitative hplc/ms-based proteomics,” in Computational proteomics, 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications398,
    title = {OpenMS - A Framework for Quantitative HPLC/MS-Based Proteomics},
    publisher = {Dagstuhl Online Publication Server (DROPS)},
    note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational  Proteomics, 20.-25. November 2005},
    author = {K. Reinert and O. Kohlbacher and C. Gr{\"o}pl and E. Lange and O. Schulz-Trieglaff and M. Sturm and N. Pfeifer},
    booktitle = {Computational Proteomics},
    year = {2005},
    url = {http://publications.imp.fu-berlin.de/398/}
    }
  • O. Schulz-Trieglaff, Modelling the randomness in biological systems, 2005.
    [Bibtex]
    @misc{fu_mi_publications402,
    year = {2005},
    author = {O. Schulz-Trieglaff},
    title = {Modelling the Randomness in Biological Systems},
    keywords = {petri nets, Gillespie algorithm},
    url = {http://publications.imp.fu-berlin.de/402/}
    }
  • O. Schulz-Trieglaff, “Software platforms for computational proteomics,” in Computational proteomics, 2005.
    [Bibtex]
    @inproceedings{fu_mi_publications403,
    note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational  Proteomics, 20.-25. November 2005},
    publisher = {Dagstuhl Online Publication Server (DROPS)},
    title = {Software Platforms for Computational Proteomics},
    year = {2005},
    author = {O. Schulz-Trieglaff},
    booktitle = {Computational Proteomics},
    url = {http://publications.imp.fu-berlin.de/403/}
    }
  • E. Wolski, M. Lalowski, P. Martus, P. Giavalisco, J. Gobom, A. Sickmann, H. Lehrach, and K. Reinert, “Transformation and other factors of the peptide mass spectormetry pairwise peaklist comparison process,” Bmc bioinformatics, vol. 6, iss. 285, p. http://www.biomedcentral.com/1471–2105/6/285, 2005.
    [Bibtex]
    @article{fu_mi_publications414,
    year = {2005},
    author = {E. Wolski and M. Lalowski and P. Martus and P. Giavalisco and J. Gobom and A. Sickmann and H. Lehrach and K. Reinert},
    pages = {http://www.biomedcentral.com/1471--2105/6/285},
    number = {285},
    volume = {6},
    title = {Transformation and other factors of the peptide mass spectormetry  pairwise peaklist comparison process},
    journal = {BMC Bioinformatics},
    url = {http://publications.imp.fu-berlin.de/414/}
    }
  • W. E. Wolski, M. Lalowski, P. Jungblut, and K. Reinert, “Calibration of mass spectrometric peptide mass fingerprint data without specific external or internal calibrants,” Bmc bioinformatics, vol. 6, iss. 203, p. http://www.biomedcentral.com/1471–2105/6/203, 2005.
    [Bibtex]
    @article{fu_mi_publications413,
    title = {Calibration of mass spectrometric peptide mass fingerprint data without  specific external or internal calibrants},
    journal = {BMC Bioinformatics},
    volume = {6},
    author = {W. E. Wolski and M. Lalowski and P. Jungblut and K. Reinert},
    number = {203},
    pages = {http://www.biomedcentral.com/1471--2105/6/203},
    year = {2005},
    url = {http://publications.imp.fu-berlin.de/413/}
    }

2004

  • M. Bauer and G. W. Klau, “Structural alignment of two rna sequences with lagrangian relaxation,” in Proceedings of the 15th international symposium, isaac 2004, hong kong, 2004, pp. 113-125.
    [Bibtex]
    @inproceedings{fu_mi_publications330,
    title = {Structural Alignment of Two RNA Sequences with Lagrangian Relaxation},
    publisher = {Springer Verlag},
    series = {LNCS 3341},
    author = {M. Bauer and G. W. Klau},
    booktitle = {Proceedings of the 15th International Symposium, ISAAC 2004, Hong  Kong},
    pages = {113--125},
    year = {2004},
    url = {http://publications.imp.fu-berlin.de/330/}
    }
  • C. Gröpl, H. -J. Prömel, and A. Srivastav, “Ordered binary decision diagrams and the shannon effect,” Discrete applied mathematics, vol. 142, pp. 67-85, 2004.
    [Bibtex]
    @article{fu_mi_publications360,
    journal = {Discrete Applied Mathematics},
    title = {Ordered binary decision diagrams and the Shannon effect},
    volume = {142},
    pages = {67--85},
    author = {C. Gr{\"o}pl and H.-J. Pr{\"o}mel and A. Srivastav},
    year = {2004},
    url = {http://publications.imp.fu-berlin.de/360/},
    keywords = {VLSI-Design and layout, Binary Decision Diagrams, Graph theory (other),  Hardware verification, Probability theory, Random graphs, Randomized
    algorithms and probabilistic analysis, Theoretical computer science
    (other)},
    abstract = {We investigate the size and structure of ordered binary decision diagrams  (OBDDs) for random Boolean functions. It was known that for most
    values of n, the expected OBDD size of a random Boolean function
    with n variables is equal to the worst-case size up to terms of lower
    order. Such a phenomenon is generally called strong Shannon effect.
    Here we show that the strong Shannon effect is not valid for all
    n. Instead it undergoes a certain periodic `phase transition': If
    n lies within intervals of constant width around the values n = 2h
    + h, then the strong Shannon effect does not hold, whereas it does
    hold outside these intervals. Our analysis provides doubly exponential
    probability bounds and generalises to ordered Kronecker functional
    decision diagrams (OKFDDs). }
    }
  • S. Istrail, G. G. Sutton, L. Florea, C. M. Mobarry, R. Lippert, B. Walenz, H. Shatkay, I. Dew, J. R. Miller, M. J. Flanigan, N. J. Edwards, R. Bolanos, D. Fasulo, B. V. Halldorsson, S. Hannenhalli, R. J. Turner, S. Yooseph, F. Lu, D. R. Nusskern, B. C. Shue, X. H. Zheng, F. Zhong, A. L. Delcher, D. H. Huson, S. A. Kravitz, L. Mouchard, K. Reinert, K. A. Remington, A. G. Clark, M. S. Waterman, E. E. Eichler, M. D. Adams, M. W. Hunkapillar, E. W. Myers, and J. C. Venter, “Whole-genome shotgun assembly and comparison of human genome assemblies,” Proceedings of the national academy of science (pnas), vol. 101, iss. 7, pp. 1916-1921, 2004.
    [Bibtex]
    @article{fu_mi_publications369,
    journal = {Proceedings of the national academy of science (PNAS)},
    title = {Whole-genome shotgun assembly and comparison of human genome assemblies},
    volume = {101},
    number = {7},
    pages = {1916--1921},
    author = {S. Istrail and G. G. Sutton and L. Florea and C. M. Mobarry and R. Lippert and B. Walenz and H. Shatkay and Ian Dew and J. R. Miller and M. J. Flanigan and N. J. Edwards and R. Bolanos and D. Fasulo and B. V. Halldorsson and S. Hannenhalli and R. J. Turner and S. Yooseph and Fu Lu and D. R. Nusskern and B. C. Shue and X. H. Zheng and F. Zhong and A. L. Delcher and D. H. Huson and S. A. Kravitz and L. Mouchard and K. Reinert and K. A. Remington and A. G. Clark and M. S. Waterman and E. E. Eichler and M. D. Adams and M. W. Hunkapillar and E. W. Myers and J. C. Venter},
    year = {2004},
    url = {http://publications.imp.fu-berlin.de/369/}
    }
  • G. W. Klau, S. Rahmann, A. Schliep, and K. Reinert, “Optimal robust non-unique probe selection using integer linear programming,” in Proceedings of the twelfth international conference on intelligent systems for molecular biology (ismb-04), 2004, pp. 186-193.
    [Bibtex]
    @inproceedings{fu_mi_publications372,
    title = {Optimal Robust Non-Unique Probe Selection Using Integer Linear Programming},
    pages = {186--193},
    author = {G. W. Klau and S. Rahmann and A. Schliep and K. Reinert},
    booktitle = {Proceedings of the Twelfth International Conference on Intelligent  Systems for Molecular Biology (ISMB-04)},
    year = {2004},
    url = {http://publications.imp.fu-berlin.de/372/}
    }
  • O. Kohlbacher and K. Reinert, “Differenzielle proteomanalyse — experimentelle methoden, algorithmische herausforderungen,” It — information technology, vol. 46, pp. 31-38, 2004.
    [Bibtex]
    @article{fu_mi_publications374,
    pages = {31--38},
    author = {O. Kohlbacher and K. Reinert},
    year = {2004},
    journal = {it -- Information technology},
    title = {Differenzielle Proteomanalyse -- Experimentelle Methoden, Algorithmische  Herausforderungen},
    volume = {46},
    url = {http://publications.imp.fu-berlin.de/374/}
    }
  • E. Lange, Peak picking in mass spectra, 2004.
    [Bibtex]
    @misc{fu_mi_publications344,
    title = {Peak picking in mass spectra},
    year = {2004},
    author = {E. Lange},
    url = {http://publications.imp.fu-berlin.de/344/},
    abstract = {High throughput analysis of proteins using mass spectrometry requires  an efficient signal preprocessing which reduces the amount of data
    with minimal loss of information. Therefore the peaks that belong
    to a peptide have to be detected, and important features like their
    peak centroid position, the height and the area have to be determined.
    Here we present a peak picking algorithm which detects peaks in a
    noisy mass spectrum using its continuous wavelet transform. By adapting
    the wavelet to the theoretical peak shape, each peaks centroid position
    is precisely identified; thus, overlapping peaks (e.g. caused by
    electronspray ionization) are well separated. Representing each peak
    not only by its centroid position and area but also by the parameters
    of the best fitting asymmetric lorentzian or hyperbolic secant squared
    functions, yields valuable information about the original peak shape
    for further analysis. Given a mass spectrum with 100,000 raw data
    points representing 200 peaks, our algorithm stored 5 parameters
    per peak, and was able to reduce the memory requirement down to 1
    percent.}
    }

2003

  • M. Bodirsky, C. Gröpl, and M. Kang, “Decomposing, counting, and generating unlabeled cubic planar graphs,” in European conference on combinatorics, graph theory, and applications eurocomb’03 prague, 2003.
    [Bibtex]
    @inproceedings{fu_mi_publications342,
    title = {Decomposing, Counting, and Generating Unlabeled Cubic Planar Graphs},
    booktitle = {European Conference on Combinatorics, Graph Theory, and Applications  EUROCOMB'03 Prague},
    author = {M. Bodirsky and C. Gr{\"o}pl and M. Kang},
    year = {2003},
    url = {http://publications.imp.fu-berlin.de/342/},
    keywords = {Cubic Planar Graph, Planar Graph, Cubic Graph, Enumeration, Random  graphs, Random Generation, Dynamic Programming, Graph Theory},
    abstract = {We present an expected polynomial time algorithm to generate an unlabeled  connected cubic planar graph uniformly at random. We first consider
    rooted cubic planar graphs, i.e., we count connected cubic planar
    graphs up to isomorphisms that fix a certain directed edge. Based
    on decompositions along the connectivity structure, we derive recurrence
    formulas for counting the exact number of rooted cubic planar graphs.
    This leads to 3-connected planar graphs, which have a unique embedding
    on the sphere; but special care has to be taken for rooted graphs
    that have a sense-reversing automorphism. Therefore we introduce
    the concept of colored networks, which stand in bijective correspondence
    to rooted graphs with given symmetries. Colored networks can again
    be decomposed along their connectivity structure. For rooted 3-connected
    cubic planar graphs embedded in the plane, we switch to the dual
    and count rooted triangulations. All these numbers can be evaluated
    in polynomial time by dynamic programming. We can use them to generate
    rooted cubic planar graphs uniformly at random. To generate connected
    cubic planar graphs without a root uniformly at random, we apply
    rejection sampling and obtain an expected polynomial time algorithm.}
    }
  • M. Bodirsky, C. Gröpl, and M. Kang, “Generating labeled planar graphs uniformly at random,” in Proceedings of icalp 2003, 2003, pp. 1095-1107.
    [Bibtex]
    @inproceedings{fu_mi_publications341,
    booktitle = {Proceedings of ICALP 2003},
    note = {Appeared 2008 in Theoretical Computer Science. \{ The download is  the journal submission \&lt;br\&gt; From time to time we receive requests
    for source code, so here it is: See the files BGK03b.README and GBK03b.tar.bz2
    \}},
    publisher = {Springer Verlag},
    year = {2003},
    author = {M. Bodirsky and C. Gr{\"o}pl and M. Kang},
    number = {2719},
    pages = {1095--1107},
    series = {Lecture Notes in Computer Science},
    title = {Generating labeled planar graphs uniformly at random},
    url = {http://publications.imp.fu-berlin.de/341/},
    keywords = {Planar Graph, Enumeration, Random graphs, Random Generation, Dynamic  Programming, Graph Theory},
    abstract = {We present an expected polynomial time algorithm to generate a labeled  planar graph uniformly at random. To generate the planar graphs,
    we derive recurrence formulas that count all such graphs with n vertices
    and m edges, based on a decomposition into 1-, 2-, and 3-connected
    components. For 3-connected graphs we apply a recent random generation
    algorithm by Schaeffer and a counting formula by Mullin and Schellenberg.}
    }
  • C. Frömmel, C. Gille, A. Goede, C. Gröpl, S. Hougardy, T. Nierhoff, R. Preissner, and M. Thimm, “Accelerating screening of 3d protein data with a graph theoretical approach,” Bioinformatics, vol. 19, iss. 18, pp. 2442-2447, 2003.
    [Bibtex]
    @article{fu_mi_publications346,
    journal = {Bioinformatics},
    number = {18},
    pages = {2442--2447},
    author = {C. Fr{\"o}mmel and Ch. Gille and A. Goede and C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and R. Preissner and M. Thimm},
    year = {2003},
    title = {Accelerating screening of 3D protein data with a graph theoretical  approach},
    volume = {19},
    month = {December},
    abstract = {Motivation: The Dictionary of Interfaces in Proteins (DIP) is a database  collecting the 3D structure of interacting parts of proteins that
    are called patches. It serves as a repository, in which patches similar
    to given query patches can be found. The computation of the similarity
    of two patches is time consuming and traversing the entire DIP requires
    some hours. In this work we address the question of how the patches
    similar to a given query can be identified by scanning only a small
    part of DIP. The answer to this question requires the investigation
    of the distribution of the similarity of patches. Results: The score
    values describing the similarity of two patches can roughly be divided
    into three ranges that correspond to different levels of spatial
    similarity. Interestingly, the two iso-score lines separating the
    three classes can be determined by two different approaches. Applying
    a concept of the theory of random graphs reveals significant structural
    properties of the data in DIP. These can be used to accelerate scanning
    the DIP for patches similar to a given query. Searches for very similar
    patches could be accelerated by a factor of more than 25. Patches
    with a medium similarity could be found 10 times faster than by brute-force
    search. 10.1093/bioinformatics/btg343},
    keywords = {algorithm, proteomics, statistics},
    url = {http://publications.imp.fu-berlin.de/346/}
    }
  • C. Gröpl, Algorithmen in der bioinformatik, 2003.
    [Bibtex]
    @misc{fu_mi_publications348,
    author = {C. Gr{\"o}pl},
    year = {2003},
    title = {Algorithmen in der Bioinformatik},
    note = {Skript zu meiner Vorlesung im Wintersemester 2002/03 am Institut  f{\"u}r Informatik der Humboldt-Universit{\"a}t zu Berlin},
    url = {http://publications.imp.fu-berlin.de/348/}
    }

2002

  • E. Althaus, A. Caprara, H. -P. Lenhof, and K. Reinert, “Multiple sequence alignment with arbitrary gap costs: computing an optimal solution using polyhedral combinatorics,” in Proceedings of the 1st european conference on computational biology (eccb 2002), 2002, pp. 4-16.
    [Bibtex]
    @inproceedings{fu_mi_publications326,
    title = {Multiple Sequence alignment with arbitrary gap costs: Computing an  optimal solution using polyhedral combinatorics},
    booktitle = {Proceedings of the 1st European Conference on Computational Biology  (ECCB 2002)},
    author = {E. Althaus and A. Caprara and H.-P. Lenhof and K. Reinert},
    pages = {4--16},
    year = {2002},
    url = {http://publications.imp.fu-berlin.de/326/}
    }
  • J. A. Bailey, Z. Gu, R. A. Clark, K. Reinert, R. V. Samonte, S. S. Schwartz, M. D. Adams, E. W. Myers, P. Li, and E. E. Eichler, “Recent segmental duplications in the human genome,” Science, vol. 297, pp. 1003-1007, 2002.
    [Bibtex]
    @article{fu_mi_publications328,
    title = {Recent Segmental Duplications in the Human Genome},
    journal = {Science},
    volume = {297},
    author = {J. A. Bailey and Z. Gu and R. A. Clark and K. Reinert and R. V. Samonte and S. S. Schwartz and M. D. Adams and E. W. Myers and P. Li and E. E. Eichler},
    pages = {1003--1007},
    year = {2002},
    url = {http://publications.imp.fu-berlin.de/328/}
    }
  • C. Gröpl, S. Hougardy, T. Nierhoff, and H. -J. Prömel, “Steiner trees in uniformly quasi-bipartite graphs,” Information processing letters, vol. 83, pp. 195-200, 2002.
    [Bibtex]
    @article{fu_mi_publications354,
    volume = {83},
    journal = {Information Processing Letters},
    title = {Steiner trees in uniformly quasi-bipartite graphs},
    year = {2002},
    pages = {195--200},
    author = {C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and H.-J. Pr{\"o}mel},
    abstract = {The area of approximation algorithms for the Steiner tree problem  in graphs has seen continuous progress over the last years. Currently
    the best approximation algorithm has a performance ratio of 1.550.
    This is still far away from 1.0074, the largest known lower bound
    on the achievable performance ratio. As all instances resulting from
    known lower bound reductions are uniformly quasi-bipartite, it is
    interesting whether this special case can be approximated better
    than the general case. We present an approximation algorithm with
    performance ratio 73/60 \&lt; 1.217 for the uniformly quasi-bipartite
    case. This improves on the previously known ratio of 1.279 of Robins
    and Zelikovsky. We use a new method of analysis that combines ideas
    from the greedy algorithm for set cover with a matroid-style exchange
    argument to model the connectivity constraint. As a consequence,
    we are able to provide a tight instance.},
    keywords = {Steiner trees, Graph algorithms, Approximation algorithms},
    url = {http://publications.imp.fu-berlin.de/354/}
    }
  • C. Gröpl, S. Hougardy, T. Nierhoff, H. -J. Prömel, and M. Thimm, Approximationsalgorithmen für das steinerbaumproblem in graphen, 2002.
    [Bibtex]
    @misc{fu_mi_publications356,
    title = {Approximationsalgorithmen f{\"u}r das Steinerbaumproblem in Graphen},
    author = {C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and H.-J. Pr{\"o}mel and M. Thimm},
    year = {2002},
    url = {http://publications.imp.fu-berlin.de/356/}
    }
  • A. L. Halpern, D. H. Huson, and K. Reinert, “Segment match refinment and applications,” in Proceedings of the 2nd workshop on algorithms bioinformatics (wabi-02), 2002, pp. 126-139.
    [Bibtex]
    @inproceedings{fu_mi_publications364,
    pages = {126--139},
    booktitle = {Proceedings of the 2nd Workshop on Algorithms Bioinformatics (WABI-02)},
    author = {A. L. Halpern and D. H. Huson and K. Reinert},
    year = {2002},
    title = {Segment Match refinment and applications},
    url = {http://publications.imp.fu-berlin.de/364/}
    }
  • D. H. Huson, K. Reinert, and E. W. Myers, “The greedy path-merging algorithm for sequence assembly,” Journal of the acm, vol. 49, iss. 5, pp. 603-615, 2002.
    [Bibtex]
    @article{fu_mi_publications367,
    volume = {49},
    journal = {Journal of the ACM},
    title = {The Greedy Path-Merging Algorithm for Sequence Assembly},
    year = {2002},
    pages = {603--615},
    number = {5},
    author = {D. H. Huson and K. Reinert and E. W. Myers},
    url = {http://publications.imp.fu-berlin.de/367/}
    }
  • R. Mural, M. D. Adams, and K. Reinert, “A comparison of whole-genome shotgun-derived mouse chromosome 16 and the human genome,” Science, vol. 296, pp. 1661-1671, 2002.
    [Bibtex]
    @article{fu_mi_publications387,
    author = {Richard Mural and M. D. Adams and K. Reinert},
    pages = {1661--1671},
    year = {2002},
    title = {A Comparison of Whole-Genome Shotgun-Derived Mouse Chromosome 16  and the Human Genome},
    journal = {Science},
    volume = {296},
    abstract = {The high degree of similarity between the mouse and human genomes is demonstrated through analysis of the sequence of mouse chromosome 16 (Mmu 16), which was obtained as part of a whole-genome shotgun assembly of the mouse genome. The mouse genome is about 10\% smaller than the human genome, owing to a lower repetitive DNA content. Comparison of the structure and protein-coding potential of Mmu 16 with that of the homologous segments of the human genome identifies regions of conserved synteny with human chromosomes (Hsa) 3, 8, 12, 16, 21, and 22. Gene content and order are highly conserved between Mmu 16 and the syntenic blocks of the human genome. Of the 731 predicted genes on Mmu 16, 509 align with orthologs on the corre- sponding portions of the human genome, 44 are likely paralogous to these genes, and 164 genes have homologs elsewhere in the human genome; there are 14 genes for which we could find no human counterpart.},
    url = {http://publications.imp.fu-berlin.de/387/}
    }

2001

  • C. Gröpl, S. Hougardy, T. Nierhoff, and H. -J. Prömel, “Approximation algorithms for the steiner tree problem in graphs,” in Steiner trees in industry, X. Cheng and D. Z. Du, Eds., Kluwer Academic Publishers, 2001, pp. 235-279.
    [Bibtex]
    @incollection{fu_mi_publications353,
    title = {Approximation Algorithms for the Steiner Tree Problem in Graphs},
    author = {C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and H.-J. Pr{\"o}mel},
    pages = {235--279},
    year = {2001},
    editor = {X. Cheng and D. Z. Du},
    note = {Survey article with new proofs},
    publisher = {Kluwer Academic Publishers},
    booktitle = {Steiner Trees in Industry},
    keywords = {Approximation algorithms, Combinatorial optimization, Graph algorithms,  Steiner trees},
    url = {http://publications.imp.fu-berlin.de/353/}
    }
  • C. Gröpl, S. Hougardy, T. Nierhoff, and H. -J. Prömel, “Lower bounds for approximation algorithms for the steiner tree problem,” in Proceedings of the 27th international workshop on graph-theoretic concepts in computer science (2001), 2001.
    [Bibtex]
    @inproceedings{fu_mi_publications355,
    author = {C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and H.-J. Pr{\"o}mel},
    booktitle = {Proceedings of the 27th International Workshop on Graph-Theoretic  Concepts in Computer Science (2001)},
    year = {2001},
    publisher = {Springer Verlag},
    title = {Lower bounds for approximation algorithms for the Steiner tree  problem},
    series = {LNCS},
    url = {http://publications.imp.fu-berlin.de/355/},
    keywords = {Steiner trees, Approximation algorithms, Combinatorial optimization,  Graph algorithms, Hypergraphs, set systems, and designs},
    abstract = {The Steiner tree problem asks for a shortest subgraph connecting a  given set of terminals in a graph. It is known to be APX-complete,
    which means that no polynomial time approximation scheme can exist
    for this problem, unless P=NP. Currently, the best approximation
    algorithm for the Steiner tree problem has a performance ratio of
    {\ensuremath{<}}span class='mathrm'{\ensuremath{>}}1.55{\ensuremath{<}}/span{\ensuremath{>}}, whereas the corresponding lower bound is smaller than {\ensuremath{<}}span class='mathrm'{\ensuremath{>}}1.01{\ensuremath{<}}/span{\ensuremath{>}}.
    In this paper, we provide for several Steiner tree approximation
    algorithms lower bounds on their performance ratio that are much
    larger. For two algorithms that solve the Steiner tree problem on
    quasi-bipartite instances, we even prove lower bounds that match
    the upper bounds. Quasi-bipartite instances are of special interest,
    as currently all known lower bound reductions for the Steiner tree
    problem in graphs produce such instances.}
    }
  • C. Gröpl, H. -J. Prömel, and A. Srivastav, “On the evolution of the worst-case obdd size,” Information processing letters, vol. 77, pp. 1-7, 2001.
    [Bibtex]
    @article{fu_mi_publications361,
    title = {On the Evolution of the Worst-Case OBDD Size},
    journal = {Information Processing Letters},
    volume = {77},
    author = {C. Gr{\"o}pl and H.-J. Pr{\"o}mel and A. Srivastav},
    pages = {1--7},
    year = {2001},
    abstract = {We prove matching lower and upper bounds on the worst-case OBDD size  of a Boolean function, revealing an interesting ocillating behaviour.},
    keywords = {Binary Decision Diagrams},
    url = {http://publications.imp.fu-berlin.de/361/}
    }
  • D. H. Huson, A. L. Halpern, Z. Lai, E. W. Myers, K. Reinert, and G. G. Sutton, “Comparing assemblies using fragments and mate-pairs,” in Proceedings of the 1st workshop on algorithms bioinformatics (wabi-01), 2001, pp. 294-306.
    [Bibtex]
    @inproceedings{fu_mi_publications365,
    author = {D. H. Huson and A. L. Halpern and Z. Lai and E. W. Myers and K. Reinert and G. G. Sutton},
    booktitle = {Proceedings of the 1st Workshop on Algorithms Bioinformatics (WABI-01)},
    pages = {294--306},
    year = {2001},
    title = {Comparing Assemblies using Fragments and Mate-pairs},
    url = {http://publications.imp.fu-berlin.de/365/}
    }
  • D. H. Huson, K. Reinert, S. A. Kravitz, K. A. Remington, A. L. Delcher, I. M. Dew, M. J. Flanigan, A. L. Halpern, Z. Lai, C. M. Mobarry, G. G. Sutton, and E. W. Myers, “Design of a compartmentalized shotgun assembler for the human genome,” in Proceedings of the ninth international conference on intelligent systems for molecular biology (ismb-01), 2001, pp. 132-139.
    [Bibtex]
    @inproceedings{fu_mi_publications366,
    year = {2001},
    author = {D. H. Huson and K. Reinert and S. A. Kravitz and K.  A. Remington and A. L. Delcher and I. M. Dew and M. J. Flanigan and A. L. Halpern and Z. Lai and C. M. Mobarry and G. G. Sutton and E. W. Myers},
    booktitle = {Proceedings of the Ninth International Conference on Intelligent  Systems for Molecular Biology (ISMB-01)},
    pages = {132--139},
    title = {Design of a compartmentalized Shotgun Assembler for the Human Genome},
    url = {http://publications.imp.fu-berlin.de/366/}
    }
  • D. H. Huson, K. Reinert, and E. W. Myers, “The greedy path-merging algorithm for sequence assembly,” in Proceedings of the fifth annual international conference on computational molecular biology (recomb-01), 2001, pp. 157-163.
    [Bibtex]
    @inproceedings{fu_mi_publications368,
    pages = {157--163},
    author = {D. H. Huson and K. Reinert and E. W. Myers},
    booktitle = {Proceedings of the Fifth Annual International Conference on Computational  Molecular Biology (RECOMB-01)},
    year = {2001},
    title = {The Greedy Path-Merging Algorithm for Sequence Assembly},
    url = {http://publications.imp.fu-berlin.de/368/}
    }
  • R. J. Turner, K. Chaturvedi, N. J. Edwards, A. L. Halpern, D. H. Huson, O. Kohlbacher, J. R. Miller, K. Reinert, K. A. Remington, R. Schwartz, B. Walenz, S. Yooseph, and S. Istrail, “Visualization challenges for a new cyberpharmaceutical computing,” in Ieee 2001 symposium on parallel and large-data visualization and graphics, 2001, pp. 7-18.
    [Bibtex]
    @inproceedings{fu_mi_publications410,
    title = {Visualization Challenges for a New Cyberpharmaceutical Computing},
    note = {Keynote address},
    booktitle = {IEEE 2001 Symposium on Parallel and Large-Data Visualization and  Graphics},
    author = {R. J. Turner and K. Chaturvedi and N. J. Edwards and A. L. Halpern and D. H. Huson and O. Kohlbacher and J. R. Miller and K. Reinert and K. A. Remington and R. Schwartz and B. Walenz and S. Yooseph and S. Istrail},
    pages = {7--18},
    year = {2001},
    url = {http://publications.imp.fu-berlin.de/410/}
    }
  • J. C. Venter, M. D. Adams, E. W. Myers, K. Reinert, and et al, “The sequence of the human genome,” Science, vol. 291, iss. 5507, pp. 1304-1351, 2001.
    [Bibtex]
    @article{fu_mi_publications411,
    year = {2001},
    pages = {1304--1351},
    number = {5507},
    author = {J. C. Venter and M. D. Adams and E. W. Myers and K. Reinert and . et al},
    volume = {291},
    journal = {Science},
    title = {The Sequence of the Human Genome},
    url = {http://publications.imp.fu-berlin.de/411/},
    keywords = {ASSEMBLY},
    abstract = {A 2.91-billion base pair (bp) consensus sequence of the euchromatic portion of the human genome was generated by the whole-genome shotgun sequencing method. The 14.8-billion bp DNA sequence was generated over 9 months from 27,271,853 high-quality sequence reads (5.11-fold coverage of the genome) from both ends of plasmid clones made from the DNA of five individuals. Two assembly strategies{--}a whole-genome assembly and a regional chromosome assembly{--}were used, each combining sequence data from Celera and the publicly funded genome effort. The public data were shredded into 550-bp segments to create a 2.9-fold coverage of those genome regions that had been sequenced, without including biases inherent in the cloning and assembly procedure used by the publicly funded group. This brought the effective cov- erage in the assemblies to eightfold, reducing the number and size of gaps in the final assembly over what would be obtained with 5.11-fold coverage. The two assembly strategies yielded very similar results that largely agree with independent mapping data. The assemblies effectively cover the euchromatic regions of the human chromosomes. More than 90\% of the genome is in scaffold assemblies of 100,000 bp or more, and 25\% of the genome is in scaffolds of 10 million bp or larger. Analysis of the genome sequence revealed 26,588 protein-encoding transcripts for which there was strong corroborating evidence and an additional}
    }

2000

  • “The genome sequence of drosophila melanogaster,” Science, vol. 287, iss. 5461, pp. 2185-2195, 2000.
    [Bibtex]
    @article{fu_mi_publications324,
    year = {2000},
    pages = {2185--2195},
    number = {5461},
    volume = {287},
    journal = {Science},
    title = {The Genome Sequence of Drosophila melanogaster},
    url = {http://publications.imp.fu-berlin.de/324/},
    keywords = {ASSEMBLY}
    }
  • G. Baudis, C. Gröpl, S. Hougardy, T. Nierhoff, and H. -J. Prömel, “Approximating minimum spanning sets in hypergraphs and polymatroids,” Humboldt-University Berlin, Technical Report , 2000.
    [Bibtex]
    @techreport{fu_mi_publications329,
    year = {2000},
    type = {Technical Report},
    author = {G. Baudis and C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and H.-J. Pr{\"o}mel},
    institution = {Humboldt-University Berlin},
    note = {This paper was already accepted for ICALP 2000 but we did not present  it since later we were informed that the main result had already
    been proven in a different way.},
    title = {Approximating Minimum Spanning Sets in Hypergraphs and Polymatroids},
    url = {http://publications.imp.fu-berlin.de/329/},
    keywords = {Hypergraphs, set systems, and designs, Steiner trees, Approximation  algorithms, Colouring, packing and covering, Combinatorial optimization,
    Graph algorithms}
    }
  • J. D. Kececioglu, H. -P. Lenhof, K. Mehlhorn, K. Reinert, and M. Vingron, “A polyhedral approach to sequence alignment problems,” Discrete applied mathematics, vol. 104, pp. 143-186, 2000.
    [Bibtex]
    @article{fu_mi_publications370,
    volume = {104},
    title = {A Polyhedral Approach to Sequence Alignment Problems},
    journal = {Discrete Applied Mathematics},
    year = {2000},
    author = {J. D. Kececioglu and H.-P. Lenhof and K. Mehlhorn and K. Reinert and M. Vingron},
    pages = {143--186},
    url = {http://publications.imp.fu-berlin.de/370/}
    }
  • M. Lermen and K. Reinert, “The practical use of the a* algorithm for exact multiple sequence alignment,” Journal of computational biology, pp. 655-671, 2000.
    [Bibtex]
    @article{fu_mi_publications383,
    journal = {Journal of Computational Biology},
    title = {The Practical Use of the A* Algorithm for Exact Multiple Sequence  Alignment},
    year = {2000},
    pages = {655--671},
    author = {M. Lermen and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/383/}
    }
  • E. W. Myers, G. G. Sutton, A. L. Delcher, D. P. Dew, M. J. Flanigan, S. A. Kravitz, C. M. Mobarry, K. Reinert, K. A. Remington, E. L. Anson, R. Bolanos, H. -H. Chou, C. M. Jordan, A. L. Halpern, S. Lonardi, E. M. Beasly, R. C. Brandon, L. Chen, P. J. Dunn, Z. Lai, Y. Liang, D. R. Nusskern, M. Zhan, Q. Zhang, X. H. Zheng, G. M. Rubin, M. D. Adams, and J. C. Venter, “A whole-genome assembly of drosophila,” Science, vol. 287, iss. 5461, pp. 2196-2203, 2000.
    [Bibtex]
    @article{fu_mi_publications388,
    volume = {287},
    title = {A Whole-Genome Assembly of Drosophila},
    journal = {Science},
    year = {2000},
    author = {E. W. Myers and G. G. Sutton and A. L. Delcher and D. P. Dew and M. J. Flanigan and S. A. Kravitz and C. M. Mobarry and K. Reinert and K. A. Remington and E. L. Anson and R. Bolanos and H.-H. Chou and C. M. Jordan and A. L. Halpern and S. Lonardi and E. M. Beasly and R. C. Brandon and L. Chen and P. J. Dunn and Z. Lai and Y. Liang and D. R. Nusskern and M. Zhan and Q. Zhang and X. H. Zheng and G. M. Rubin and M. D. Adams and J. C. Venter},
    pages = {2196--2203},
    number = {5461},
    url = {http://publications.imp.fu-berlin.de/388/},
    keywords = {ASSEMBLY}
    }
  • K. Reinert, J. Stoye, and T. Will, “An iterative methods for faster sum-of-pairs multiple sequence alignment,” Bioinformatics, vol. 16, iss. 9, pp. 808-814, 2000.
    [Bibtex]
    @article{fu_mi_publications400,
    author = {K. Reinert and J. Stoye and T. Will},
    number = {9},
    pages = {808--814},
    year = {2000},
    title = {An Iterative Methods for Faster Sum-of-Pairs Multiple Sequence Alignment},
    journal = {BIOINFORMATICS},
    volume = {16},
    url = {http://publications.imp.fu-berlin.de/400/}
    }

1999

  • C. Gröpl, Binary decision diagrams for random boolean functions, 1999.
    [Bibtex]
    @unpublished{fu_mi_publications349,
    school = {Humboldt-Universit{\"a}t zu Berlin},
    title = {Binary Decision Diagrams for Random Boolean Functions},
    year = {1999},
    author = {C. Gr{\"o}pl},
    url = {http://publications.imp.fu-berlin.de/349/},
    keywords = {Binary decision diagram, Boolean function, probabilistic analysis,  Shannon effect},
    abstract = {(deutsche Zusammenfassung siehe unten)\&lt;p\&gt; Binary Decision Diagrams  (BDDs) are a data structure for Boolean functions which are also
    known as branching programs. In ordered binary decision diagrams
    (OBDDs), the tests have to obey a fixed variable ordering. In free
    binary decision diagrams (FBDDs), each variable can be tested at
    most once. The efficiency of new variants of the BDD concept is usually
    demonstrated with spectacular (worst-case) examples. We pursue another
    approach and compare the representation sizes of almost all Boolean
    functions. Whereas I. Wegener proved that for `most' values of n
    the expected OBDD size of a random Boolean function of n variables
    is equal to the worst-case size up to terms of lower order, we show
    that this is not the case for n within intervals of constant length
    around the values n = 2h + h. Furthermore, ranges of n exist for
    which minimal FBDDs are almost always at least a constant factor
    smaller than minimal OBDDs. Our main theorems have doubly exponentially
    small probability bounds (in n). We also investigate the evolution
    of random OBDDs and their worst-case size, revealing an oscillating
    behaviour that explains why certain results cannot be improved in
    general. \&lt;p\&gt; \&lt;b\&gt;Zusammenfassung:\&lt;/b\&gt;\&lt;p\&gt; Binary Decision Diagrams
    (BDDs) sind eine Datenstruktur f{\"u}r Boolesche Funktionen, die
    auch unter dem Namen branching program bekannt ist. In ordered binary
    decision diagrams (OBDDs) m{\"u}ssen die Tests einer festen Variablenordnung
    gen{\"u}gen. In free binary decision diagrams (FBDDs) darf jede Variable
    h{\"o}chstens einmal getestet werden. Die Effizienz neuer Varianten
    des BDD-Konzepts wird gew{\"o}hnlich anhand spektakul{\"a}rer (worst-case)
    Beispiele aufgezeigt. Wir verfolgen einen anderen Ansatz und vergleichen
    die Darstellungsgr{\"o}{\ss}en f{\"u}r fast alle Booleschen Funktionen.
    W{\"a}hrend I. Wegener bewiesen hat, da{\ss} f{\"u}r die `meisten'
    n die erwartete OBDD-Gr{\"o}{\ss}e einer zuf{\"a}lligen Booleschen
    Funktion von n Variablen gleich der worst-case Gr{\"o}{\ss}e bis
    auf Terme kleinerer Ordnung ist, zeigen wir da{\ss} dies nicht der
    Fall ist f{\"u}r n innerhalb von Intervallen konstanter L{\"a}nge
    um die Werte n = 2h + h. Ferner gibt es Bereiche von n, in denen
    minimale FBDDs fast immer um mindestens einen konstanten Faktor kleiner
    sind als minimale OBDDs. Unsere Haupts{\"a}tze ha ben doppelt exponentielle
    Wahrschein- lichkeitsschranken (in n). Au{\ss}erdem untersuchen wir
    die Entwicklung zuf{\"a}lliger OBDDs und ihrer worst-case Gr{\"o}{\ss}e
    und decken dabei ein oszillierendes Verhalten auf, das erkl{\"a}rt,
    warum gewisse Aussagen im allgemeinen nicht verst{\"a}rkt werden
    k{\"o}nnen. \&lt;p\&gt;Schlagw{\"o}rter:\&lt;p\&gt; Bin{\"a}res Entscheidungsdiagramm,
    Boolesche Funktion,probabilistische Analyse, Shannon Effekt.}
    }
  • H. -P. Lenhof, B. Morgenstern, and K. Reinert, “An exact solution for the segment-to-segment multiple sequence alignment problem,” Bioinformatics, vol. 15, iss. 3, pp. 203-210, 1999.
    [Bibtex]
    @article{fu_mi_publications381,
    title = {An exact solution for the segment-to-segment multiple sequence alignment  problem},
    journal = {BIOINFORMATICS},
    volume = {15},
    author = {H.-P. Lenhof and B. Morgenstern and K. Reinert},
    pages = {203--210},
    number = {3},
    year = {1999},
    url = {http://publications.imp.fu-berlin.de/381/}
    }

1998

  • C. Gröpl, H. -J. Prömel, and A. Srivastav, “Size and structure of random ordered binary decision diagrams (extended abstract),” in Stacs 98, Berlin, Heidelberg, New York, 1998, pp. 238-248.
    [Bibtex]
    @inproceedings{fu_mi_publications359,
    publisher = {Springer Verlag},
    editor = {D. Korb and Ch. Meinel and M. Morvan},
    booktitle = {STACS 98},
    series = {Lecture Notes in Computer Science},
    title = {Size and Structure of Random Ordered Binary Decision Diagrams (Extended  Abstract)},
    year = {1998},
    address = {Berlin, Heidelberg, New York},
    number = {1373},
    pages = {238--248},
    author = {C. Gr{\"o}pl and H.-J. Pr{\"o}mel and A. Srivastav},
    keywords = {VLSI-Design and layout, Hardware verification, Random graphs},
    url = {http://publications.imp.fu-berlin.de/359/}
    }
  • C. Gröpl and M. Skutella, “Parallel repetition of mip(2,1) systems,” in Lectures on proof verification and approximation algorithms, E. W. Mayr, H. -J. Prömel, and A. Steger, Eds., Springer, 1998, vol. 1367, pp. 161-177.
    [Bibtex]
    @incollection{fu_mi_publications363,
    publisher = {Springer},
    note = {The book grow out of a Dagstuhl Seminar, April 21-25, 1997},
    editor = {E. W. Mayr and H.-J. Pr{\"o}mel and A. Steger},
    booktitle = {Lectures on Proof Verification and Approximation Algorithms},
    title = {Parallel Repetition of MIP(2,1) Systems},
    series = {Lecture Notes in Computer Science},
    volume = {1367},
    pages = {161--177},
    author = {C. Gr{\"o}pl and M. Skutella},
    year = {1998},
    keywords = {Theoretical computer science (other), Approximation algorithms, PCP  and non-approximability},
    url = {http://publications.imp.fu-berlin.de/363/}
    }
  • H. -P. Lenhof, K. Reinert, and M. Vingron, “A polyhedral approach to rna sequence structure alignment,” Journal of computational biology, vol. 5, iss. 3, pp. 517-530, 1998.
    [Bibtex]
    @article{fu_mi_publications382,
    year = {1998},
    author = {H.-P. Lenhof and K. Reinert and M. Vingron},
    pages = {517--530},
    number = {3},
    volume = {5},
    title = {A Polyhedral Approach to RNA Sequence Structure Alignment},
    journal = {Journal of Computational Biology},
    url = {http://publications.imp.fu-berlin.de/382/}
    }
  • H. -P. Lenhof, K. Reinert, and M. Vingron, “A polyhedral approach to rna sequence structure alignment,” in Proceedings of the second annual international conference on computational molecular biology (recomb-98), 1998, pp. 153-162.
    [Bibtex]
    @inproceedings{fu_mi_publications380,
    pages = {153--162},
    booktitle = {Proceedings of the Second Annual International Conference on Computational  Molecular Biology (RECOMB-98)},
    author = {H.-P. Lenhof and K. Reinert and M. Vingron},
    year = {1998},
    title = {A polyhedral approach to RNA sequence structure alignment},
    url = {http://publications.imp.fu-berlin.de/380/}
    }

1997

  • M. Block, C. Gröpl, H. Preuss, H. J. Prömel, and A. Srivastav, “Efficient ordering of state variables and transition relation partitions in symbolic model checking,” Humboldt-Universität zu Berlin, Technical Report , 1997.
    [Bibtex]
    @techreport{fu_mi_publications335,
    title = {Efficient ordering of state variables and transition relation partitions  in symbolic model checking},
    institution = {Humboldt-Universit{\"a}t zu Berlin},
    author = {M. Block and C. Gr{\"o}pl and H. Preuss and H. J. Pr{\"o}mel and A. Srivastav},
    type = {Technical Report},
    year = {1997},
    url = {http://publications.imp.fu-berlin.de/335/},
    keywords = {Randomized algorithms and probabilistic analysis, VLSI-Design and  layout, Binary Decision Diagrams, Hardware verification, Local search
    and metaheuristics}
    }
  • K. Reinert, H. -P. Lenhof, P. Mutzel, and J. D. Kececioglu, “A branch-and-cut algorithm for multiple sequence alignment,” in Proceedings of the first annual international conference on computational molecular biology (recomb-97), 1997, pp. 241-249.
    [Bibtex]
    @inproceedings{fu_mi_publications399,
    year = {1997},
    pages = {241--249},
    author = {K. Reinert and H.-P. Lenhof and P. Mutzel and J. D. Kececioglu},
    booktitle = {Proceedings of the First Annual International Conference on Computational  Molecular Biology (RECOMB-97)},
    title = {A branch-and-Cut algorithm for multiple sequence alignment },
    url = {http://publications.imp.fu-berlin.de/399/}
    }

1996

  • P. G. Bradford and K. Reinert, “Lower bounds for row minima searching,” in Proceedings of the 23rd international colloquium on automata, languages, and programming 1996 (icalp-96), lncs 1099, 1996, pp. 454-465.
    [Bibtex]
    @inproceedings{fu_mi_publications343,
    booktitle = {Proceedings of the 23rd International Colloquium on Automata, Languages,  and Programming 1996 (ICALP-96), LNCS 1099},
    author = {P. G. Bradford and K. Reinert},
    pages = {454--465},
    year = {1996},
    title = {Lower Bounds for Row Minima Searching},
    url = {http://publications.imp.fu-berlin.de/343/}
    }
  • C. Gröpl, Über approximationsalgorithmen zur färbung k-färbbarer graphen, die vektorchromatische zahl und andere varianten der vartheta-funktionForschungsinstitut für Diskrete Mathematik: , 1996.
    [Bibtex]
    @unpublished{fu_mi_publications350,
    author = {C. Gr{\"o}pl},
    address = {Forschungsinstitut f{\"u}r Diskrete Mathematik},
    year = {1996},
    title = {{\"U}ber Approximationsalgorithmen zur F{\"a}rbung k-f{\"a}rbbarer  Graphen, die vektorchromatische Zahl und andere Varianten der vartheta-Funktion},
    month = {January},
    school = {Rheinische Friedrich-Wilhelms-Universit{\"a}t Bonn},
    url = {http://publications.imp.fu-berlin.de/350/},
    keywords = {Approximation algorithms, Colouring, packing and covering}
    }

Leave a Reply

Your email address will not be published. Required fields are marked *