Challenge 8: The Biologist Problem

_________________________________________________________________________
You are a biologist examining dna sequences of different life forms. you are given two dna sequences, and the goal is to find the largest ordered set of adjecent bases that are the same in both DNAs.

DNA sequences are given as ordered sets of nucleotide bases: adenine (abbreviated a), cytosine (c), guanine (g) and thymine (t).

ATGTCTTCCTCGA
TGCTTCCTATGAC

Figure 1. DNA strands

For the example above, the result is CTTCCT because that is the largest ordered set of adjecent bases that can be found in both lifeforms.

Input and output are received from the standard input and output streams.

Input Two strings of dna nucleotide bases, each representing one genome sequence.

Output A single string – the longest sequence of adjecent nucleotide bases that are present in both lifeforms.

Input sample

ctgactga actgagc cgtaattgcgat cgtacagtagc ctgggccttgaggaaaactg gtaccagtactgatagt

Output sample

actga cgta actg

TEST

aatttcggtcgacgtccgtgcctttgcccgggaagactaatagggccgttgtgtgaggggc actcacctacatgaagtcgaccttaggttgtagatgtagtaacggaatgaatccgatcggtgt
cataccccatccacaaagtctgcaggaacagctcgg attaagacttcgaaacgatcagggtatctctcaggagcttcatcatacctctaccgtgc
tagctgtccgtgacgagctcccacagatgtcgtgtgaaacgg atgtgatatttttgcaactcatgatcactcgccgcaggacgctgcaatggtattaaccgggcccagcccgacgtaaaacccaaatctaaat
agtggcaggaggtgttcgtcactccggtacggtcatacgtgagcgccagcaacaaagctgcacatacctaacgcgttgcgacac accatttccaacgtcgatacaccaggtatgtccgagtaatggccagcacgcttaagccgtatagcaatgcactttcgcccg
gagctatgtgttctctgttcgtccgt cattctccgactttttaacatgttatggcacaggatatcagg
ggcttaatacggagaattgtcaaatgatgacatcgctagtctcatgcgccggggtgcttattggcaacatttcacggtatatgaggcgtatatctaa tccgcccgtttaatg
gagtctaattgtgagcgacggtctcgactgagaggagctgatgggaaggcatgctcgtcttatggaggagg tccggaggtcaagcatcgctggggttgggtttgcaccatataggtgcagtagacgagactgaggttatga
tcaccattgga agatgataaggtcggtcgcaccaatgcaaacctctataggcaatatgcgtacggcgagcacagtagtggggattagggtagattcgtacactacgatcc
ctcccctcatagcaacgaagt gtccgaacca
agctggggcaactcatgccagttgtattagcgatatggtcccaagaacagacgtacactacg gtggagggtcccaatgatactgctc

_________________________________________________________________________
UserInterface Class:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.StringTokenizer;

/**
 * Clase encargada de mantener el bucle de ejecución del programa hasta el fin de linea.
 * @author Javier de Pedro López
 */
public class UserInterface {

    /**
     * Permite mantener el bucle de lecutra.
     */
    public void run() throws IOException{
        String line;
        BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));

        //Bucle que realiza las lecturas hasta que lo recibido sea EOF (null)
        do{
            line = reader.readLine();
            if  (line != null && !line.trim().equals("")){
                StringTokenizer tokens = new StringTokenizer(line," ");
                String cad1 = tokens.nextToken();
                String cad2 = tokens.nextToken();
                AnalizerDNA analizer = new AnalizerDNA(cad1,cad2);
                System.out.println(analizer.getMaxCoincidence());
            }
        } while(line != null);
    }
}

AnalizerDNA Class:

/**
 * Clase que se dedica a analizar las coincidencias del ADN.
 * @author Javier de Pedro López
 */
public class AnalizerDNA {

    //ATRIBUTOS//
    private String _maxDNACoincidence;

    //CONSTRUCTOR//
    /**
     * Instancia un nuevo analizador de ADN.
     * @param cad1 primera cadena de la coincidencia.
     * @param cad2 segunda cadena de la coincidencia.
     */
    public AnalizerDNA(String cad1, String cad2){
        _maxDNACoincidence = "";
        String partialCoincidence;
        for (int i = 0; i < cad1.length(); i++){
            for (int j = 0; j < cad2.length(); j++){
                int coinc1 = i;
                int coinc2 = j;
                partialCoincidence = "";
                boolean haveCoincidence = true;
                while (coinc2 < cad2.length() && coinc1 < cad1.length() && haveCoincidence){
                    if (cad1.charAt(coinc1) == cad2.charAt(coinc2)){
                        partialCoincidence += cad1.charAt(coinc1);
                    }else{
                        haveCoincidence = false;
                    }
                    coinc1++;
                    coinc2++;
                }
                if (partialCoincidence.length() > _maxDNACoincidence.length()){
                    _maxDNACoincidence = partialCoincidence;
                }
            }

        }
    }

    //METODOS//
    /**
     * Permite obtener la maxima coincidencia entre dos cadenas.
     * @return la maxima coincidencia entre dos cadenas de ADN.
     */
    public String getMaxCoincidence(){
        return _maxDNACoincidence;
    }
}

$- Leave your comment

Fill in your details below or click an icon to log in:

Logo de WordPress.com

You are commenting using your WordPress.com account. Log Out / Cambiar )

Twitter picture

You are commenting using your Twitter account. Log Out / Cambiar )

Facebook photo

You are commenting using your Facebook account. Log Out / Cambiar )

Connecting to %s