summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'sci-biology/meme/files/meme-4.11.2_p2-patch1.patch')
-rw-r--r--sci-biology/meme/files/meme-4.11.2_p2-patch1.patch383
1 files changed, 383 insertions, 0 deletions
diff --git a/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch b/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch
new file mode 100644
index 000000000000..30a182f8f43a
--- /dev/null
+++ b/sci-biology/meme/files/meme-4.11.2_p2-patch1.patch
@@ -0,0 +1,383 @@
+--- a/doc/alphabet-format.html
++++ b/doc/alphabet-format.html
+@@ -233,7 +233,7 @@
+ providing a reference on the meaning of the symbols used. If present, the
+ symbol name must be the second field.</p>
+ <p>The &quot;<span class="pdat">name</span>&quot; follows the rules of
+- <a href="qstr">quoted text</a>.</p>
++ <a href="#qstr">quoted text</a>.</p>
+ </div>
+ <h5>color</h5>
+ <div class="indent">
+--- a/doc/release-notes.html
++++ b/doc/release-notes.html
+@@ -14,8 +14,26 @@
+ <h2>Motif-based sequence analysis tools</h2>
+ </div>
+ <h2>MEME Suite Release Notes</h2>
++ <hr>
++ <b>MEME version 4.11.2 patch 1 -- June 16, 2016</b>
++ <ul>
++ <li>
++ <b>Bug fixes</b>
++ <ul>
++ <li>
++ Fixed bug in MCAST 4.11.2 that caused it to prematurely truncate
++ reading the sequence file.
++ </li>
++ <li>
++ Modified MEME to fall back to a simple Dirichlet prior when
++ using DNA or a custom alphabet with a prior that requires
++ a prior library, but no prior libray is specified.
++ </li>
++ </ul
++ </li>
++ </ul>
++ <p>
+ <hr>
+- <p>
+ <b>MEME version 4.11.2 -- May 5 2016</b>
+ </p>
+ <ul>
+--- a/src/fasta-io.c
++++ b/src/fasta-io.c
+@@ -14,6 +14,7 @@
+ #include "alphabet.h"
+ #include "fasta-io.h"
+ #include "io.h"
++#include "seq-reader-from-fasta.h"
+ #include "prior-reader-from-psp.h"
+ #include "seq.h"
+
+@@ -159,61 +160,6 @@
+ }
+
+ /****************************************************************************
+- * Read raw sequence until a new sequence is encountered or too many letters
+- * are read. The new sequence is appended to the end of the given
+- * sequence.
+- *
+- * Return: Was the sequence read completely?
+- ****************************************************************************/
+-static BOOLEAN_T read_raw_sequence_from_reader(
+- DATA_BLOCK_READER_T *fasta_reader, // Sequence source
+- char* name, // Sequence ID (used in error messages).
+- ALPH_T* alph, // Alphabet in use
+- unsigned int offset, // Current position in raw_sequence.
+- unsigned int max_chars, // Maximum chars in raw_sequence.
+- char* raw_sequence // Pre-allocated sequence.
+-) {
+- // tlb; change a_char to integer so it will compile on SGI
+- int a_char;
+- int start_update;
+- BOOLEAN_T return_value = TRUE;
+-
+- // Start at the end of the given sequence.
+- assert(offset < max_chars);
+-
+- DATA_BLOCK_T *seq_block = new_sequence_block(max_chars - offset);
+- return_value = !fasta_reader->get_next_block(fasta_reader, seq_block);
+-
+- char *seq_buffer = get_sequence_from_data_block(seq_block);
+- size_t seq_buffer_size = get_num_read_into_data_block(seq_block);
+- int i;
+- for (i = 0; i < seq_buffer_size; ++i) {
+- a_char = seq_buffer[i];
+- // Skip non-alphabetic characters.
+- if (!isalnum(a_char) && a_char != '-' && a_char != '*' && a_char != '.') {
+- if ((a_char != ' ') && (a_char != '\t') && (a_char != '\n') && (a_char != '\r')) {
+- fprintf(stderr, "Warning: Skipping character %c in sequence %s.\n",
+- a_char, name);
+- }
+- } else {
+- // skip check if unknown alph
+- if (alph != NULL && !alph_is_known(alph, a_char)) {
+- fprintf(stderr, "Warning: Converting illegal character %c to %c ",
+- a_char, alph_wildcard(alph));
+- fprintf(stderr, "in sequence %s.\n", name);
+- a_char = alph_wildcard(alph);
+- }
+- raw_sequence[offset] = (char) a_char;
+- ++offset;
+- }
+- }
+-
+- raw_sequence[offset] = '\0';
+- free_data_block(seq_block);
+- return(return_value);
+-}
+-
+-/****************************************************************************
+ * Read one sequence from a file in Fasta format.
+ *
+ * Return: Was a sequence successfully read?
+@@ -320,44 +266,6 @@
+ }
+
+ /****************************************************************************
+- * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
+- * and copy them in to the raw sequence in the SEQ_T object starting at the
+- * given buffer offset.
+- ****************************************************************************/
+-void read_one_fasta_segment_from_reader(
+- DATA_BLOCK_READER_T *fasta_reader,
+- size_t max_size,
+- size_t buffer_offset,
+- SEQ_T *sequence
+-) {
+-
+- assert(sequence != NULL);
+- assert(get_seq_length(sequence) <= max_size);
+-
+- // Get the raw sequence buffer from the SEQ_T
+- char *raw_sequence = get_raw_sequence(sequence);
+- if (raw_sequence == NULL) {
+- // Allocate space for raw sequence if not done yet.
+- raw_sequence = mm_malloc(sizeof(char) * max_size + 1);
+- raw_sequence[0] = 0;
+- }
+-
+- // Read a block of sequence charaters into the
+- // raw sequence buffer for the SEQ_T.
+- char *name = get_seq_name(sequence);
+- BOOLEAN_T is_complete = read_raw_sequence_from_reader(
+- fasta_reader,
+- name,
+- NULL, //FIXME this is dodgy, need a proper way of getting the alphabet. The fasta_reader has it but it is not accessable!
+- buffer_offset,
+- max_size,
+- raw_sequence
+- );
+- set_raw_sequence(raw_sequence, is_complete, sequence);
+-
+-}
+-
+-/****************************************************************************
+ * Read all the sequences from a FASTA file at once.
+ Multiple files can be appended by calling this more than once.
+ ****************************************************************************/
+--- a/src/fasta-io.h
++++ b/src/fasta-io.h
+@@ -43,19 +43,6 @@
+ );
+
+ /****************************************************************************
+- * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
+- * and copy them in to the raw sequence in the SEQ_T object starting at the
+- * given buffer offset.
+- ****************************************************************************/
+-void read_one_fasta_segment_from_reader(
+- DATA_BLOCK_READER_T *fasta_reader,
+- size_t max_size,
+- size_t buffer_offset,
+- SEQ_T* sequence
+-);
+-
+-
+-/****************************************************************************
+ * Read all the sequences from a file in Fasta format.
+ ****************************************************************************/
+ void read_many_fastas
+--- a/src/init.c
++++ b/src/init.c
+@@ -767,10 +767,16 @@
+ if (alph_is_builtin_protein(alph)) { // default mixture prior for proteins
+ plib_name = make_path_to_file(get_meme_etc_dir(), PROTEIN_PLIB);
+ } else {
+- fprintf(stderr, "The prior library must be specified for DNA or custom "
+- "alphabets when specifiying a prior type of 'dmix', 'mega' "
+- "or 'megap'.");
+- exit(1);
++ fprintf(
++ stderr,
++ "WARNING: When using DNA or a custom alphabet, "
++ "and specifiying a prior type of\n"
++ "'dmix', 'mega' or 'megap', a prior library must be provided.\n"
++ "No prior library was provided, so a simple Dirichlet prior will be used.\n"
++ );
++ prior = "dirichlet";
++ ptype = Dirichlet;
++ if (beta <= 0) beta = 0.01; // default b = 0.01 for simple Dirichlet
+ }
+ }
+ }
+--- a/src/seq-reader-from-fasta.c
++++ b/src/seq-reader-from-fasta.c
+@@ -639,11 +639,140 @@
+ return fasta_reader->current_position;
+ }
+
++
++/****************************************************************************
++ * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
++ * and copy them in to the raw sequence in the SEQ_T object starting at the
++ * given buffer offset.
++ ****************************************************************************/
++void read_one_fasta_segment_from_reader(
++ DATA_BLOCK_READER_T *fasta_reader,
++ size_t max_size,
++ size_t offset,
++ SEQ_T *sequence
++) {
++
++
++ assert(sequence != NULL);
++ assert(offset < max_size);
++
++ // Get the raw sequence buffer from the SEQ_T
++ char *raw_sequence = get_raw_sequence(sequence);
++ if (raw_sequence == NULL) {
++ // Allocate space for raw sequence if not done yet.
++ raw_sequence = mm_malloc(sizeof(char) * max_size + 1);
++ raw_sequence[0] = 0;
++ }
++
++ // Read a block of sequence charaters into the
++ // raw sequence buffer for the SEQ_T, starting at offset.
++ BOOLEAN_T is_complete = read_raw_sequence_from_reader(
++ fasta_reader,
++ max_size - offset,
++ raw_sequence + offset
++ );
++ set_raw_sequence(raw_sequence, is_complete, sequence);
++}
++
++/****************************************************************************
++ * Read raw sequence until a new sequence is encountered or too many letters
++ * are read.
++ *
++ * Return: Was the sequence read completely?
++ ****************************************************************************/
++BOOLEAN_T read_raw_sequence_from_reader(
++ DATA_BLOCK_READER_T *reader, // Sequence source
++ unsigned int max_chars, // Maximum chars in raw_sequence.
++ char* raw_sequence // Pre-allocated sequence buffer.
++) {
++
++ SEQ_READER_FROM_FASTA_T *fasta_reader
++ = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
++
++ // Read sequence into temp. buffer from the sequence file.
++ char buffer[max_chars];
++ long start_file_pos = ftell(fasta_reader->fasta_file);
++ size_t seq_index = 0;
++ size_t total_read = 0;
++ while (seq_index < max_chars) {
++
++ size_t num_char_read = fread(
++ buffer,
++ sizeof(char),
++ max_chars - seq_index,
++ fasta_reader->fasta_file
++ );
++ fasta_reader->current_position += num_char_read;
++ total_read += num_char_read;
++
++ if (feof(fasta_reader->fasta_file)) {
++ fasta_reader->at_end_of_file = TRUE;
++ }
++ else if (num_char_read < (max_chars - seq_index)) {
++ die(
++ "Error while reading sequence from file:%s.\nError message: %s\n",
++ fasta_reader->filename,
++ strerror(ferror(fasta_reader->fasta_file))
++ );
++ }
++
++ size_t i;
++ for(i = 0; i < num_char_read; ++i) {
++ char c = buffer[i];
++ assert(c != 0);
++ if (isspace(c)) {
++ // Skip over white space
++ fasta_reader->at_start_of_line = (c == '\n');
++ }
++ else if (c == '>' && fasta_reader->at_start_of_line == TRUE) {
++ // We found the start of a new sequence while trying
++ // to fill the buffer. Leave the buffer incomplete.
++ // and wind back the file
++ fseek(fasta_reader->fasta_file, start_file_pos + i - 1, SEEK_SET);
++ fasta_reader->current_position = start_file_pos + i - 1;
++ fasta_reader->at_end_of_seq = TRUE;
++ fasta_reader->at_start_of_line = FALSE;
++ fasta_reader->at_end_of_file = FALSE;
++ break;
++ }
++ else {
++ fasta_reader->at_start_of_line = FALSE;
++ // Check that character is legal in alphabet.
++ // If not, replace with wild card character.
++ if (alph_is_known(fasta_reader->alphabet, c)) {
++ raw_sequence[seq_index] = c;
++ }
++ else {
++ raw_sequence[seq_index] = alph_wildcard(fasta_reader->alphabet);
++ fprintf(
++ stderr,
++ "Warning: %c is not a valid character in %s alphabet.\n"
++ " Converting %c to %c.\n",
++ c,
++ alph_name(fasta_reader->alphabet),
++ c,
++ raw_sequence[i]
++ );
++ }
++ ++seq_index;
++ }
++ }
++ if (fasta_reader->at_end_of_seq | fasta_reader->at_end_of_file) {
++ break;
++ }
++ }
++
++ raw_sequence[seq_index] = '\0';
++ return(fasta_reader->at_end_of_seq | fasta_reader->at_end_of_file);
++}
++
+ /******************************************************************************
+- * Fills in the next data block for the sequence.
+- * During the first call for the sequence it fills in the full data block.
+- * On successive calls, shifts the sequence in the block down one position
+- * and reads one more character.
++ * Populates the data block for the with the next block of sequence.
++ *
++ * During the first call for the sequence it fills in a buffer from a file,
++ * The sequence pointer in the data block is set to point at the start of the buffer.
++ * On successive calls, the sequence pointer in the block is shifted down one position
++ * in the buffer. When the end of the buffer is reached, it is filled again from the file.
+ *
+ * Returns TRUE if it was able to completely fill the block, FALSE if
+ * the next sequence or EOF was reached before the block was filled.
+--- a/src/seq-reader-from-fasta.h
++++ b/src/seq-reader-from-fasta.h
+@@ -37,5 +37,30 @@
+ int * end_ptr // end position of sequence (chr:\d+-(\d+))
+ );
+
++/****************************************************************************
++ * Read raw sequence until a new sequence is encountered or too many letters
++ * are read.
++ *
++ * Return: Was the sequence read completely?
++ ****************************************************************************/
++BOOLEAN_T read_raw_sequence_from_reader(
++ DATA_BLOCK_READER_T *fasta_reader, // Sequence source
++ unsigned int max_chars, // Maximum chars in raw_sequence.
++ char* raw_sequence // Pre-allocated sequence.
++);
++
++/****************************************************************************
++ * Read up to max_chars letters of one sequence from a DATA_BLOCK_T readder
++ * and copy them in to the raw sequence in the SEQ_T object starting at the
++ * given buffer offset.
++ ****************************************************************************/
++void read_one_fasta_segment_from_reader(
++ DATA_BLOCK_READER_T *reader,
++ size_t max_size,
++ size_t offset,
++ SEQ_T *sequence
++);
++
++
+ size_t get_current_pos_from_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader);
+ #endif