1 files changed, 383 insertions, 0 deletions
diff --git a/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch b/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch
new file mode 100644
index 000000000000..30a182f8f43a
--- /dev/null
+++ b/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch
@@ -0,0 +1,383 @@
+--- a/doc/alphabet-format.html
++++ b/doc/alphabet-format.html
+@@ -233,7 +233,7 @@
+           providing a reference on the meaning of the symbols used. If present, the
+           symbol name must be the second field.</p>
+           <p>The &quot;<span class="pdat">name</span>&quot; follows the rules of
+-          <a href="qstr">quoted text</a>.</p>
++          <a href="#qstr">quoted text</a>.</p>
+         </div>
+         <h5>color</h5>
+         <div class="indent">
+--- a/doc/release-notes.html
++++ b/doc/release-notes.html
+@@ -14,8 +14,26 @@
+       <h2>Motif-based sequence analysis tools</h2>
+     </div>
+     <h2>MEME Suite Release Notes</h2>
++    <hr>
++      <b>MEME version 4.11.2 patch 1 -- June 16, 2016</b>
++      <ul>
++        <li>
++            <b>Bug fixes</b>
++            <ul>
++              <li>
++              Fixed bug in MCAST 4.11.2 that caused it to prematurely truncate
++              reading the sequence file.
++              </li>
++              <li>
++              Modified MEME to fall back to a simple Dirichlet prior when
++              using DNA or a custom alphabet with a prior that requires
++              a prior library, but no prior libray is specified.
++              </li>
++            </ul
++        </li>
++      </ul>
++      <p>
+       <hr>
+-      <p>
+         <b>MEME version 4.11.2 -- May 5 2016</b>
+       </p>
+       <ul>
+--- a/src/fasta-io.c
++++ b/src/fasta-io.c
+@@ -14,6 +14,7 @@
+ #include "alphabet.h"
+ #include "fasta-io.h"
+ #include "io.h"
++#include "seq-reader-from-fasta.h"
+ #include "prior-reader-from-psp.h"
+ #include "seq.h"
+ 
+@@ -159,61 +160,6 @@
+ }
+ 
+ /****************************************************************************
+- * Read raw sequence until a new sequence is encountered or too many letters
+- * are read.  The new sequence is appended to the end of the given
+- * sequence.
+- *
+- * Return: Was the sequence read completely?
+- ****************************************************************************/
+-static BOOLEAN_T read_raw_sequence_from_reader(
+-   DATA_BLOCK_READER_T *fasta_reader, // Sequence source
+-   char* name, // Sequence ID (used in error messages).
+-   ALPH_T* alph, // Alphabet in use
+-   unsigned int offset, // Current position in raw_sequence.
+-   unsigned int max_chars, // Maximum chars in raw_sequence.
+-   char* raw_sequence // Pre-allocated sequence.
+-) {
+-  // tlb; change a_char to integer so it will compile on SGI
+-  int a_char;
+-  int start_update;
+-  BOOLEAN_T return_value = TRUE;
+-
+-  // Start at the end of the given sequence.
+-  assert(offset < max_chars);
+-
+-  DATA_BLOCK_T *seq_block = new_sequence_block(max_chars - offset);
+-  return_value = !fasta_reader->get_next_block(fasta_reader, seq_block);
+-
+-  char *seq_buffer = get_sequence_from_data_block(seq_block);
+-  size_t seq_buffer_size = get_num_read_into_data_block(seq_block);
+-  int i;
+-  for (i = 0; i < seq_buffer_size; ++i) {
+-    a_char = seq_buffer[i];
+-    // Skip non-alphabetic characters.
+-    if (!isalnum(a_char) && a_char != '-' && a_char != '*' && a_char != '.') {
+-      if ((a_char != ' ') && (a_char != '\t') && (a_char != '\n') && (a_char != '\r')) {
+-        fprintf(stderr, "Warning: Skipping character %c in sequence %s.\n",
+-                a_char, name);
+-      }
+-    } else {
+-      // skip check if unknown alph
+-      if (alph != NULL && !alph_is_known(alph, a_char)) {
+-        fprintf(stderr, "Warning: Converting illegal character %c to %c ",
+-                a_char, alph_wildcard(alph));
+-        fprintf(stderr, "in sequence %s.\n", name);
+-        a_char = alph_wildcard(alph);
+-      }
+-      raw_sequence[offset] = (char) a_char;
+-      ++offset;
+-    }
+-  }
+-
+-  raw_sequence[offset] = '\0';
+-  free_data_block(seq_block);
+-  return(return_value);
+-}
+-
+-/****************************************************************************
+  * Read one sequence from a file in Fasta format.
+  *
+  * Return: Was a sequence successfully read?
+@@ -320,44 +266,6 @@
+ }
+ 
+ /****************************************************************************
+- * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
+- * and copy them in to the raw sequence in the SEQ_T object starting at the
+- * given buffer offset. 
+- ****************************************************************************/
+-void read_one_fasta_segment_from_reader(
+-   DATA_BLOCK_READER_T *fasta_reader,
+-   size_t max_size,
+-   size_t buffer_offset,
+-   SEQ_T *sequence
+-) {
+-
+-  assert(sequence != NULL);
+-  assert(get_seq_length(sequence) <= max_size);
+-
+-  // Get the raw sequence buffer from the SEQ_T
+-  char *raw_sequence = get_raw_sequence(sequence);
+-  if (raw_sequence == NULL) {
+-    // Allocate space for raw sequence if not done yet.
+-    raw_sequence = mm_malloc(sizeof(char) * max_size + 1);
+-    raw_sequence[0] = 0;
+-  }
+-
+-  // Read a block of sequence charaters into the
+-  // raw sequence buffer for the SEQ_T.
+-  char *name = get_seq_name(sequence);
+-  BOOLEAN_T is_complete = read_raw_sequence_from_reader(
+-    fasta_reader,
+-    name,
+-    NULL, //FIXME this is dodgy, need a proper way of getting the alphabet. The fasta_reader has it but it is not accessable!
+-    buffer_offset,
+-    max_size,
+-    raw_sequence
+-  );
+-  set_raw_sequence(raw_sequence, is_complete, sequence);
+-
+-}
+-
+-/****************************************************************************
+  * Read all the sequences from a FASTA file at once.
+    Multiple files can be appended by calling this more than once.
+  ****************************************************************************/
+--- a/src/fasta-io.h
++++ b/src/fasta-io.h
+@@ -43,19 +43,6 @@
+ );
+ 
+ /****************************************************************************
+- * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
+- * and copy them in to the raw sequence in the SEQ_T object starting at the
+- * given buffer offset. 
+- ****************************************************************************/
+-void read_one_fasta_segment_from_reader(
+-  DATA_BLOCK_READER_T *fasta_reader,
+-  size_t max_size,
+-  size_t buffer_offset,
+-  SEQ_T* sequence
+-);
+-
+-
+-/****************************************************************************
+  * Read all the sequences from a file in Fasta format.
+  ****************************************************************************/
+ void read_many_fastas
+--- a/src/init.c
++++ b/src/init.c
+@@ -767,10 +767,16 @@
+       if (alph_is_builtin_protein(alph)) { // default mixture prior for proteins
+         plib_name = make_path_to_file(get_meme_etc_dir(), PROTEIN_PLIB);
+       } else {
+-        fprintf(stderr, "The prior library must be specified for DNA or custom "
+-            "alphabets when specifiying a prior type of 'dmix', 'mega' "
+-            "or 'megap'.");
+-        exit(1);
++        fprintf(
++          stderr, 
++          "WARNING: When using DNA or a custom alphabet, "
++          "and specifiying a prior type of\n"
++          "'dmix', 'mega' or 'megap', a prior library must be provided.\n"
++          "No prior library was provided, so a simple Dirichlet prior will be used.\n"
++        );
++        prior = "dirichlet";
++        ptype = Dirichlet;
++        if (beta <= 0) beta = 0.01; // default b = 0.01 for simple Dirichlet
+       }
+     }
+   }
+--- a/src/seq-reader-from-fasta.c
++++ b/src/seq-reader-from-fasta.c
+@@ -639,11 +639,140 @@
+   return fasta_reader->current_position;
+ }
+ 
++
++/****************************************************************************
++ * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
++ * and copy them in to the raw sequence in the SEQ_T object starting at the
++ * given buffer offset. 
++ ****************************************************************************/
++void read_one_fasta_segment_from_reader(
++   DATA_BLOCK_READER_T *fasta_reader,
++   size_t max_size,
++   size_t offset,
++   SEQ_T *sequence
++) {
++
++
++  assert(sequence != NULL);
++  assert(offset < max_size);
++
++  // Get the raw sequence buffer from the SEQ_T
++  char *raw_sequence = get_raw_sequence(sequence);
++  if (raw_sequence == NULL) {
++    // Allocate space for raw sequence if not done yet.
++    raw_sequence = mm_malloc(sizeof(char) * max_size + 1);
++    raw_sequence[0] = 0;
++  }
++
++  // Read a block of sequence charaters into the
++  // raw sequence buffer for the SEQ_T, starting at offset.
++  BOOLEAN_T is_complete = read_raw_sequence_from_reader(
++    fasta_reader,
++    max_size - offset,
++    raw_sequence + offset
++  );
++  set_raw_sequence(raw_sequence, is_complete, sequence);
++}
++
++/****************************************************************************
++ * Read raw sequence until a new sequence is encountered or too many letters
++ * are read.
++ *
++ * Return: Was the sequence read completely?
++ ****************************************************************************/
++BOOLEAN_T read_raw_sequence_from_reader(
++   DATA_BLOCK_READER_T *reader, // Sequence source
++   unsigned int max_chars, // Maximum chars in raw_sequence.
++   char* raw_sequence // Pre-allocated sequence buffer.
++) {
++
++  SEQ_READER_FROM_FASTA_T *fasta_reader 
++    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
++
++  // Read sequence into temp. buffer from the sequence file.
++  char buffer[max_chars];
++  long start_file_pos = ftell(fasta_reader->fasta_file);
++  size_t seq_index = 0;
++  size_t total_read = 0;
++  while (seq_index < max_chars) {
++
++    size_t num_char_read = fread(
++      buffer,
++      sizeof(char), 
++      max_chars - seq_index,
++      fasta_reader->fasta_file
++    );
++    fasta_reader->current_position += num_char_read;
++    total_read += num_char_read;
++
++    if (feof(fasta_reader->fasta_file)) {
++       fasta_reader->at_end_of_file = TRUE;
++    }
++    else if (num_char_read < (max_chars - seq_index)) {
++      die(
++        "Error while reading sequence from file:%s.\nError message: %s\n", 
++        fasta_reader->filename,
++        strerror(ferror(fasta_reader->fasta_file))
++      );
++    }
++
++    size_t i;
++    for(i = 0; i < num_char_read; ++i) {
++      char c = buffer[i];
++      assert(c != 0);
++      if (isspace(c)) {
++        // Skip over white space
++        fasta_reader->at_start_of_line = (c == '\n');
++      }
++      else if (c == '>' && fasta_reader->at_start_of_line == TRUE) {
++        // We found the start of a new sequence while trying
++        // to fill the buffer. Leave the buffer incomplete.
++        // and wind back the file
++        fseek(fasta_reader->fasta_file, start_file_pos + i - 1, SEEK_SET);
++        fasta_reader->current_position = start_file_pos + i - 1;
++        fasta_reader->at_end_of_seq = TRUE;
++        fasta_reader->at_start_of_line = FALSE;
++        fasta_reader->at_end_of_file = FALSE;
++        break;
++      }
++      else {
++        fasta_reader->at_start_of_line = FALSE;
++        // Check that character is legal in alphabet. 
++        // If not, replace with wild card character.
++        if (alph_is_known(fasta_reader->alphabet, c)) {
++          raw_sequence[seq_index] = c;
++        }
++        else {
++          raw_sequence[seq_index] = alph_wildcard(fasta_reader->alphabet);
++          fprintf(
++            stderr, 
++            "Warning: %c is not a valid character in %s alphabet.\n"
++            "         Converting %c to %c.\n",
++            c,
++            alph_name(fasta_reader->alphabet),
++            c,
++            raw_sequence[i]
++          );
++        }
++        ++seq_index;
++      }
++    }
++    if (fasta_reader->at_end_of_seq | fasta_reader->at_end_of_file) {
++      break;
++    }
++  }
++
++  raw_sequence[seq_index] = '\0';
++  return(fasta_reader->at_end_of_seq | fasta_reader->at_end_of_file);
++}
++
+ /******************************************************************************
+- * Fills in the next data block for the sequence. 
+- * During the first call for the sequence it fills in the full data block.
+- * On successive calls, shifts the sequence in the block down one position
+- * and reads one more character.
++ * Populates the data block for the with the next block of sequence. 
++ *
++ * During the first call for the sequence it fills in a buffer from a file,
++ * The sequence pointer in the data block is set to point at the start of the buffer.
++ * On successive calls, the sequence pointer in the block is shifted down one position
++ * in the buffer. When the end of the buffer is reached, it is filled again from the file.
+  * 
+  * Returns TRUE if it was able to completely fill the block, FALSE if 
+  * the next sequence or EOF was reached before the block was filled.
+--- a/src/seq-reader-from-fasta.h
++++ b/src/seq-reader-from-fasta.h
+@@ -37,5 +37,30 @@
+   int * end_ptr           // end position of sequence (chr:\d+-(\d+))
+ );
+ 
++/****************************************************************************
++ * Read raw sequence until a new sequence is encountered or too many letters
++ * are read.
++ *
++ * Return: Was the sequence read completely?
++ ****************************************************************************/
++BOOLEAN_T read_raw_sequence_from_reader(
++   DATA_BLOCK_READER_T *fasta_reader, // Sequence source
++   unsigned int max_chars, // Maximum chars in raw_sequence.
++   char* raw_sequence // Pre-allocated sequence.
++);
++
++/****************************************************************************
++ * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
++ * and copy them in to the raw sequence in the SEQ_T object starting at the
++ * given buffer offset. 
++ ****************************************************************************/
++void read_one_fasta_segment_from_reader(
++   DATA_BLOCK_READER_T *reader,
++   size_t max_size,
++   size_t offset,
++   SEQ_T *sequence
++);
++
++
+ size_t get_current_pos_from_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader);
+ #endif