diff options
-rw-r--r-- | sci-biology/goby-cpp/Manifest | 7 | ||||
-rw-r--r-- | sci-biology/goby-cpp/files/Alignments.proto | 597 | ||||
-rw-r--r-- | sci-biology/goby-cpp/files/Reads.proto | 96 | ||||
-rw-r--r-- | sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch | 16 | ||||
-rw-r--r-- | sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild | 9 |
5 files changed, 9 insertions, 716 deletions
diff --git a/sci-biology/goby-cpp/Manifest b/sci-biology/goby-cpp/Manifest index 88622e3c8136..2bd240a998af 100644 --- a/sci-biology/goby-cpp/Manifest +++ b/sci-biology/goby-cpp/Manifest @@ -1,3 +1,4 @@ -DIST goby_1.9.7.3-cpp.zip 127215 SHA256 8493daa7c850732c6c48d4512bd26b7eec411a729b39d9861a4a6aae08faa674 SHA512 56bf190224b6f22e0578cea4cc950e52e746655c75ffc13675276787b4d0ced682f891f6ecf7af3cf124b535ac3afc8711b0ecff44d6fd25fe521de7371c3486 WHIRLPOOL ae7ead1b0364383b46d4ef8b59453146c68384b379c26498fc9b24d014ba096a99723bad42cfeb84d44c20e4fc14882bbad303ab8c981889f90dff88a882c5c0 -DIST goby_1.9.8.1-cpp.zip 134904 SHA256 2f1bd87f2870af178f34a8e7c11819aa9e42f35e20f1985d2ceb054f452e2a97 SHA512 d31cd7f0be19074bfe8da74d9f2510f0e0f15fe6c485bbed8520052468d2cd2f1bc5fcad8b0d6a1586f5acde73db326059f45994ecfbb5fb6c09692d8e155190 WHIRLPOOL 6ce51c46f8802d31068f510f6da13b2920086eafdae24506830b42d79e48eb6ed9cac48a96090a81964daebf4a0c8f21c490ca3b0af2f589ac57647bde1be79f -DIST goby_2.0.1-cpp.zip 177718 SHA256 5ec57b833cb1a0f53e975112d1c360b14a9b17cfff3fb0ad77dd70672c1881db SHA512 992bd10d5538dec1478820f26151dd311f4de13e7947b49f0b06d6cbdd4b71deeb3aa8a4c6a598fb92fbcb9cbf4ff97bf81205c9389d4a0da4443317e48aea9f WHIRLPOOL ab94cf674703917b6f0cde812d0fbcd94e18fb6055b30d6a1eefa1e4cb5b76bbe18c67388c66e25e87e522df9a9946b0eae5a164428abe874a382f5bc39a13d0 +DIST goby-cpp-2.0.1-files.tar.bz2 8354 BLAKE2B 0169e1bbcdc27f359cde47df708546dd6af0a68334295b247a6aac9122b7e9b1ee590fe0b57052c642b7e25478f5b118c70bec0c4b4af3694ab0f68c1c9ea73a SHA512 6f0cf466688cdbe9fe646cdff78dd0721fd0b0819c354c63e7c39c45895c319754cdadf23aeb9d544b0b2c68f1168583cb541ec160ba7f567fa0218dbad38e1e +DIST goby_1.9.7.3-cpp.zip 127215 BLAKE2B 0673c36b503a6daee5fdaaf96fb415277502c0a49e530eb39983d4718f4a1d8eb9a6ff0a3202413c358600aafc2bf73482be12462f798923c13e19a6bcd590b1 SHA512 56bf190224b6f22e0578cea4cc950e52e746655c75ffc13675276787b4d0ced682f891f6ecf7af3cf124b535ac3afc8711b0ecff44d6fd25fe521de7371c3486 +DIST goby_1.9.8.1-cpp.zip 134904 BLAKE2B 800f3bcbe9f721bfb636f514630fb1ceba3a1fe41616f63fc15f9f2a24394ef9be90419ccad0c9bd8b29100eeaea57659ba013042cf4a11b6038fc6dee782619 SHA512 d31cd7f0be19074bfe8da74d9f2510f0e0f15fe6c485bbed8520052468d2cd2f1bc5fcad8b0d6a1586f5acde73db326059f45994ecfbb5fb6c09692d8e155190 +DIST goby_2.0.1-cpp.zip 177718 BLAKE2B 666b50fdc199693f8a4f9b6007f6609e91ab6093b643da88e580c9a3438a150cd7be78d2b5dcdd2fe905263d32ebbac1e0e47dbc637fd5d59f877e7cbdaaeeb2 SHA512 992bd10d5538dec1478820f26151dd311f4de13e7947b49f0b06d6cbdd4b71deeb3aa8a4c6a598fb92fbcb9cbf4ff97bf81205c9389d4a0da4443317e48aea9f diff --git a/sci-biology/goby-cpp/files/Alignments.proto b/sci-biology/goby-cpp/files/Alignments.proto deleted file mode 100644 index fe7f56647644..000000000000 --- a/sci-biology/goby-cpp/files/Alignments.proto +++ /dev/null @@ -1,597 +0,0 @@ -package goby; - -option java_package = "edu.cornell.med.icb.goby.alignments"; - -option optimize_for = SPEED; - -/* - This message is written to 'basename'.entries as a very large chunked collection. -*/ -message AlignmentCollection { - repeated AlignmentEntry alignment_entries = 1; -} - - -message AlignmentEntry { - /* Multiplicity of this entry. The number of times this alignment entry would be repeated exactly the same if - query redundancy had not been removed by read factorization. - */ - optional uint32 multiplicity = 7; - - /* - Compressed stream of data. Removed since Goby 2.0 supports chunk codecs. Do not reuse field index 23 - optional bytes compressed_data = 23; - */ - - /* An integer that uniquely identifies the query (a short read) in a set of alignment runs. When several - alignment runs are made with the same set of query sequences, equality of query index means that the query - sequences were the same. (Comparing integers for equality is much faster than comparing strings.) - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 query_index = 1; - /* An integer that uniquely identifies the target (e.g., a chromosome) in a set of alignment runs. When several - alignment runs are made with the same set of target sequences, equality of target index means that the target - sequence was the same across the runs. (Comparing integers for equality is much faster than comparing strings.) - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 target_index = 2; - /* - The position on the target of the start of the alignment between the query and the target. - In the following example, position is 3 because the third base of the query 'C' was aligned with - position 3 of the reference (two read bases were soft clipped: "ct"). This example shows that the - alignment can start at a mismatch if it was so constructed by the aligner. - - 0123456789 - AAAAGTCAAA target - ctCGTC query - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 position = 3; - - /* - True when the query matches the target on the reverse strand - */ - optional bool matching_reverse_strand = 6; - - /* - The position on the query where the alignment starts. This value is different from zero - when some bases/residues of the query could not be aligned with the target. - TODO: Rename this to left_trim. Add a right_trim property. - */ - optional uint32 query_position = 5; - - /* - The score of the alignment, where larger scores indicate better matches between the query and the target. - If an aligner outputs only the number of mismatches between query and target, the score is taken to be - -(#mismatches(query,target)). - */ - optional float score = 4; - - /* - Number of bases/residues that differ in the alignment between query and target sequences. - */ - optional uint32 number_of_mismatches = 8; - - /* - Cumulative number of insertions and/or deletions present in the alignment. - */ - optional uint32 number_of_indels = 9; - - /* - Number of bases that have been aligned for the query. Please note that query_aligned_length must be - less or equal to query_length. - */ - optional uint32 query_aligned_length = 11; - - /* - Number of bases that have been aligned for the target. - */ - optional uint32 target_aligned_length = 12; - - repeated SequenceVariation sequence_variations = 13; - - /* - Length of the query sequence. - */ - optional uint32 query_length = 10; - /* - Mapping Quality (phred-scaled posterior probability that the mapping - position of this read is incorrect). Please note that different aligners - may estimate mapping quality with different approaches, resulting in aligner - specific differences in the distribution of mapping quality. It is recommended - to condition mapping quality on the aligner that produced the specific alignment - being processed. See aligner name and version in the header. - Note that the following description is preliminary. A clear specification is - needed: - The mapping quality should be proportional to the - log of the probability that the given mapping is the "correct" one. - So if there are five equally good mappings of a read to the genome, - the probability of each would be 0.2, and the mapping quality would be - something like -10*log10(1-0.2) = 1. If a mapping is highly likely, - say a 1e-4 of it being wrong, then the mapping quality would be - -10*log10(1e-4) = 40. - */ - optional int32 mapping_quality = 14; - - /* - If this read was aligned with a pair, the flags for the pair alignment (based on SAM): - 000000001 paired - 000000010 properly paired - 000000100 read unmapped - 000001000 mate unmapped - 000010000 read reverse strand - 000100000 mate reverse strand - 001000000 first in pair - 010000000 second in pair - 100000000 not primary alignment - */ - optional uint32 pair_flags = 15; - - /* - If there is an alignment entry for the paired read (the paired read was mapped), a link to the entry is given. - */ - optional RelatedAlignmentEntry pair_alignment_link = 16; - - /* Index of the read fragment from which this alignment was obtained. */ - optional uint32 fragment_index = 17; - - /* If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two or more - alignment entries, one for each matching part of the read, and link these entries with - spliced_alignment_links. The field spliced_forward_alignment_link points to the next - AlignmentEntry in the chain of spliced alignments. - */ - optional RelatedAlignmentEntry spliced_forward_alignment_link = 18; - - /* If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two or more - alignment entries, one for each matching part of the read, and link these entries with - spliced_alignment_links. The field spliced_backward_alignment_link points to the previous - AlignmentEntry in the chain of spliced alignments. - */ - optional RelatedAlignmentEntry spliced_backward_alignment_link = 22; - - /* - If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two alignment entries, one for each - matching part of the read, and flag describes the spliced_alignment_link with these - binary flags: - 000000001 normal - 000000010 novel - */ - optional uint32 spliced_flags = 19; - - /* The size of the insert used when making the sequence library. This is the total size of the DNA - fragment to sequence, without the adapters. This is not the length of sequence that separates the reads. - See http://seqanswers.com/forums/showthread.php?t=8730 for details. Insert size is inferred for each pair - of reads by the aligner and is recorded here if was estimated (i.e., for paired-end reads). - */ - optional sint32 insert_size = 20; - - /* - The sample index. Uniquely identifies the aligned sample this read was read from. Storing the sample index in the - alignment entry makes it possible to concat alignments from different origins and track what sample originally - contained each entry. - */ - optional uint32 sample_index = 21; - /* - The total number of times the query index associated with this entry occurs across the entire alignment file. - - This field is used to purge queryIndex->smallIndex associations after all instances of a queryindex have - been seen (see QueryIndexPermutation class). When each entry has a value for this field, the header field - query_index_occurrences is true. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 query_index_occurrences = 25; - /* - The total number of times the read matches the reference across the entire alignment file. This differs from - query_index_occurrences because reads that are matching through splice and pair links count as one for ambiguity. - The field can be used to filter by ambiguity-threshold on the fly after an alignment has been done (to restrict - entries to more smaller thresholds). When each entry has a value for this field, the header field - ambiguity_stored_in_entries is true. - - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 ambiguity = 27; - /* - List of BAM attributes, if the alignment was imported from BAM. The attributes are stored in exactly the format - allowed for BAM. For instance, X0:i:9 X1:i:1 MD:Z:68 RG:Z:SRR084825 will be stored as four strings: - "X0:i:9", "X1:i:1", "MD:Z:68", "RG:Z:SRR084825". Note that sam-to-compact will interpret some BAM attributes - and populate goby native fields. Such tags do not appear in bam_attributes, and are instead re-generated from - the corresponding goby native fields. - Since Goby 2.0. - */ - repeated string bam_attributes = 50; - /* - Quality scores for all bases of the read. - Since Goby 2.0. - */ - optional bytes read_quality_scores = 55; - - /* - Origin index. An integer that references a ReadOriginInfo message in the alignment header and - makes it possible to track the origin of the read (especially useful after several alignments - have been merged/concatenated). - (Since Goby 2.0). - */ - optional uint32 read_origin_index = 26; - /* - Bases that an aligner considered do not belong to the alignment of the read to the reference. Potentially - erroneous bases, or bases that belong to a different part of the reference genome. Left clipped bases are - stored in this field as character bases, or as an equal sign character '=' when the clipped base did match - the reference base. For instance "A=G" for three soft-clipped bases, the middle one matching the genome at - this position. The number of bases in softClippedBasesLeft is exactly equal to queryPosition. - */ - optional string softClippedBasesLeft = 30; - /* - Bases that an aligner considered do not belong to the alignment of the read to the reference. Potentially - erroneous bases, or bases that belong to a different part of the reference genome. Right clipped bases are - stored in this field as character bases, or as an equal sign character '=' when the clipped base did match - the reference base. The number of bases in softClippedBasesRight is exactly equal - to queryLength - queryAlignedLength - queryPosition. - */ - optional string softClippedBasesRight = 31; - - /* - Quality scores for bases in softClippedBasesLeft. Stored in Phred Units. - */ - optional bytes softClippedQualityLeft = 32; - /* - Quality scores for bases in softClippedBasesRight. Stored in Phred Units. - */ - optional bytes softClippedQualityRight = 33; - /* - Sequence for a read placed near this entry, but unmapped to the reference sequence. For instance, used to record - the sequence of a mate that did not map to the reference. We know that the mate maps in the proximity of this entry - (it is placed) but are unable to map it to a specific genomic position. The sequence is always given as obtained - from the reads file. - */ - optional string placedUnmappedSequence=40; - /* - Quality scores for a read placed near this entry. Phred units. - */ - optional bytes placedUnmappedQuality=41; - - /* - Read name. In SAM/BAM this is referred to as QNAME. Paired and segmented reads will have the same Read name. - */ - optional string readName=42; -} - -/* A link to another alignment entry. This message type is used to represent relations - between alignments, such as the relation between the two read fragments in a paired-end protocol, - or the relation between parts of reads that align through an exon exon junction and map in - different locations of the genome. - */ -message RelatedAlignmentEntry { - /* Target index of the location where the other alignment entry is mapped. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 target_index = 1; - - /* Position on the reference where the other alignment entry is mapped. * - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 position = 2; - - /* Index of the fragment for the related alignment entry. This index - makes it possible to identify which of the read fragments mapped to the given - location is related to the source alignment entry. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 fragment_index = 3; - - optional uint32 optimized_index=50; -} - -/* - Represents sequence variations between the query and the reference sequences. Many variations can be represented. - For instance, an insertion at position 5 in the reference would be represented as from="A", to="" position=5. - A mutation T->G at position 6 would be rendered as from="T", to="G" position=6. Padded alignments (see SAM description) - can be described by a combination of pair-wise alignments, where the gap character '-' is used to indicate that no - base exists in the sequence considered for the alignment position, for instance: - - - Padding example: - - 123 (<-positions) -ref A-C - A-T [from="-" to="" position=2] [from="C" to="T" position=3] - ACT [from="" to="C" position=2] [from="C" to="T" position=3] - A-T [from="-" to="" position=2] [from="C" to="T" position=3] - - - Mutation example: - 123 (<-positions) -ref ATT - ACT [from="T" to="C" position=2] - - -- Example of deletion in a read: - 123 (<-positions) -ref ATT - A-T [from="T" to="-" position=2] - - -- Example of insertion of two base pairs in a read: - 12345 (<-positions) -ref A--TT - ACCTT [from="" to="CC" position=2] - - */ -message SequenceVariation { - /* The reference bases. Can include one or more gap characters '-', to indicate that the reference sequence has - no base at this alignment position. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional string from = 2; - /* The read bases that differ from the reference sequence. Can include one or more gap characters '-', to indicate - that the query sequence has no base at this alignment position. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional string to = 1; - /* - The position of the variation on the read, as if the read always matched on the forward strand. - Adding position to the index where the reference starts aligning the read yields the position of the variation - in reference/target sequence space. Since position starts at one the resulting position will also be one based. - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 position = 3; - /* - The position of the variation, starting from the beginning of the aligned read (position 1), and up to the length - of the read (inclusive). Use this index if you need to know how far the variation is observed from the beginning - of the sequenced read. When the read has an insertion, this index records the position immediately before the base - where the bases are inserted (these bases are in the to field). - When the read has a deletion, read_index records the position in the read after which the bases that would align - in the reference are missing (these bases are in the from field). - This field is required (enforced by semantic validation in Goby 2.0+). - */ - optional uint32 read_index = 5; - - /** - The read base quality scores for those bases that are given in the to field. This field - is populated when the reads used to perform the search include quality scores, and when - the alignment parser can extract the information from the aligner's output. - (this option is currently not implemented in Goby.) - */ - optional bytes to_quality = 4; - -} -/* - This message is written to 'basename'.header -*/ - -message AlignmentHeader { - /* - The smallest possible query index in this alignment. Data stored as an array where - queryIndex is the array index will be stored with only the elements in the inclusive - range [smallestSplitQueryIndex largestSplitQueryIndex] - Such data structures include queryLength and some arrays in the TooManyHits data - structure. - */ - optional uint32 smallest_split_query_index = 9; - /* - The largest possible query index in this alignment. Data stored as an array where - queryIndex is the array index will be stored with only the elements in the inclusive - range [smallestSplitQueryIndex largestSplitQueryIndex] - Such data structures include queryLength and some arrays in the TooManyHits data - structure. - */ - optional uint32 largest_split_query_index = 11; - - /* Mapping from query identifier name to query index (as used in alignment entries). - */ - optional IdentifierMapping query_name_mapping = 1; - - /* Mapping from target identifier name to target index (as used in alignment entries). - */ - optional IdentifierMapping target_name_mapping = 2; - - /* - The number of query sequences - */ - optional uint32 number_of_queries = 5; - /* - The number of target sequences - */ - optional uint32 number_of_targets = 6; - /* - The number of reads that were aligned to the reference and are represented in this alignment archive. - */ - optional uint32 number_of_aligned_reads = 7; - - /* - Length of the query sequences. One number per query, in the order of increasing query index. - This information has been moved to the individual alignment entries. - */ - repeated uint32 query_length = 3 [deprecated = true]; - /* - If query length is constant across all the queries, this field contains the constant length. - In such cases, query_length will be empty. - */ - optional uint32 constant_query_length = 10; - - /* - Length of the target sequences. One number per target, in the order of increasing target index. - The target indexes must be 0..(number of targets - 1). - */ - repeated uint32 target_length = 8; - /* - Indicates whether this alignment is sorted by position. True: the alignment entries occur in sorted - order, such that entry a occurs before entry b if a.targetIndex< b.targetIndex or, when entries - have the same target, when a.position < b.position. - */ - optional bool sorted = 13; - - /* - Indicates whether this alignment is indexed by position. When this attribute is true, a file called - 'basename'.index exists that contains the AlignmentIndex message (GZip compressed). - */ - optional bool indexed = 14; - /* - True when query lengths are stored in alignment entries (Goby 1.7+). - */ - optional bool query_lengths_stored_in_entries = 15; - /* - Name of the aligner that produced this alignment. - */ - optional string aligner_name = 17; - /* - Version number for the aligner implementation that produced this alignment. - */ - optional string aligner_version = 18; - /* - The version of Goby that created this alignment file. - */ - optional string version = 25; - - /* - Sample basenames, in the order of increasing sampleIndex, starting with sampleIndex=0. - */ - - repeated string sample_basename = 30; - - /* - This field is true when the query indices of alignment entries were permuted to smaller indices. Only sorted - alignments can have query_indices_were_permuted=true. When the field is true, and you need to retrieve the - original query-index of an alignment (because you want to retrieve the specific read(s) from a read file for - instance), you will need the information in the permutation file (extension basename.perm) and transform back - each small index of interest to the original query index. - */ - optional bool query_indices_were_permuted = 26; - /* - This field is true when entries in the alignment .entries file all have the query_index_occurrences field populated - (Since Goby 2.0). - */ - optional bool query_index_occurrences = 35; - - /* - This field is true when entries in the alignment .entries file all have the ambiguity field populated - (Since Goby 2.0). - */ - optional bool ambiguity_stored_in_entries = 36; - /* - This field is true when entries in the alignment .entries file all have the read_quality_score field populated. - (Since Goby 2.0). - */ - optional bool all_read_quality_scores = 40; - /* - A description of the origin of sets of reads. Serves a similar function to BAM read groups, but more flexible and - efficient. Instead of storing strings, we use integers in the entries. - Alignemnt entries will link to a specific ReadOriginInfo with the origin_index field. - (Since Goby 2.0). - */ - repeated ReadOriginInfo read_origin = 27; -} - -message IdentifierMapping { - repeated IdentifierInfo mappings = 1; -} - -message IdentifierInfo { - required string name = 1; - required uint32 index = 2; -} - - -/* - A description of the origin of sets of reads. Stored in the Goby alignment header and linked - from alignment entries. Goby makes it possible to adapt origin equivalence rules on the fly - efficiently. To do this, it is sufficient to read the header of the alignment, decide which - ReadOriginInfo instances are equivalent (e.g., by looking at sample, platform, library, or - other fields in the message), then construct a function e(a):int. This function takes - one originIndex parameter and returns another integer that maps to an equivalent class. The - equivalence class can be used to estimate error models for entries that belong to each class, - for instance. - (Since Goby 2.0). - */ -message ReadOriginInfo { - /* - Origin index. An integer that links alignment entries to their origin information. - */ - required uint32 origin_index = 1; - /* - Identifier that describes the origin of the reads. This field is compatible with the ID/platform field of BAM read - groups. Free text. - */ - required string origin_id = 2; - /* - The sample from which the reads were sequenced. This field is compatible with the SM/sample field of BAM read - groups. Free text. - */ - optional string sample = 4; - /* - The platform on which the reads were sequenced. This field is compatible with the PL/platform field of BAM read - groups. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO. - */ - optional string platform = 5; - /* - The library from which the reads were sequenced. This field is compatible with the LB/library field of BAM read - groups. Free text. - */ - optional string library = 8; - /* - The platform unit on which the reads were sequenced. This field for compatibility with samtools. - */ - optional string platform_unit = 12; - /* - The date the reads were sequenced. Useful to identify batch effects, in the format dd:MMM:yyyy. - The month is Jan, Feb, etc. to avoid all confusion with days when day<=12. - */ - optional string run_date = 6; -} - -/* - This message is written to 'basename'.tmh -*/ - -message AlignmentTooManyHits { - /* - The threshold used by the aligner to determine that a query is ambiguous and should be dropped. - Referred to as parameter k below. - */ - required uint32 aligner_threshold = 2; - /* - The hits that are assigned to several (>k) reference location. - */ - repeated AmbiguousLocation hits = 1; - -} - -message AmbiguousLocation { - /* - The index of the query that matched too many times. - */ - required uint32 query_index = 1; - /* - The number of hits that triggered membership in the too many hits list. The query may hit more - locations than reported here, since some alignment tools will just drop queries that match above - a threshold and stop counting. This number can be >=k. - */ - required uint32 at_least_number_of_hits = 2; - /** -The length of the part of the query sequence that could be matched to the target (also called depth). -May be less than the length of the query sequence, in which case the match was not perfect. When merging -alignments produced by searching different reference sequences, consider only at_least_number_of_hits -from alignments that have exactly the longer depth for the query. */ - optional uint32 length_of_match = 3; -} - -/* - This message is written to 'basename'.index - */ -message AlignmentIndex { - /* - Stores one element by target sequence. Each element is the cumulative target length for the target - stored at index i. Assume there are four target sequences, with lengths {10, 12, 15, 34}. The field - targetPositionOffsets will contain: {0,10,22,37}. Such offsets can be used to calculate the absolute - position of a genomic location. Given targetIndex and positionOnReference, the absolute location - is defined as targetPositionOffsets[targetIndex]+positionOnReference. - */ - repeated uint32 target_position_offsets = 1 [packed = true]; - /* - The byte offsets into the compressed entries file. Byte offsets are matched with absolute position - by index. There should be as many elements in offsets as there are in absolutePosition - where chunks start which represent entries whose absolute positions are less than - */ - repeated uint64 offsets = 2 [packed = true]; - /* - The absolute positions of the first entry in the chunk that immediately start at offset. One element - per chunk in the 'basename'.entries file. - */ - repeated uint64 absolute_positions = 3 [packed = true]; - -} diff --git a/sci-biology/goby-cpp/files/Reads.proto b/sci-biology/goby-cpp/files/Reads.proto deleted file mode 100644 index 32c1244a3eb3..000000000000 --- a/sci-biology/goby-cpp/files/Reads.proto +++ /dev/null @@ -1,96 +0,0 @@ -package goby; - -option java_package = "edu.cornell.med.icb.goby.reads"; -option optimize_for = SPEED; - -message ReadCollection { - repeated ReadEntry reads = 1; -} - -message ReadEntry { - /* - Index of a read. - */ - required uint32 read_index = 1; - /* - Index of the barcode, if any. - */ - optional uint32 barcode_index = 10; - /* - Read identifier/name may be present. - */ - optional string read_identifier = 23; - /* - Additional description about the read (from Fasta/Q format). - */ - optional string description = 22; - /* - Length of the sequence. - */ - required uint32 read_length = 2; - /* - Sequence, encoded as ascii characters stored in single bytes. - */ - optional bytes sequence = 3; - /* - The second sequence in a pair. Stored the same way as the sequence attribute. - */ - optional bytes sequence_pair = 5; - /* - Length of the second sequence in a pair. - */ - optional uint32 read_length_pair = 6; - /* - Quality scores in Phred units, stored as single bytes (0-255). - */ - optional bytes quality_scores = 4; - /* - Quality scores for the second sequence in a pair. Stored as the 'qualityScores' attribute. - */ - optional bytes quality_scores_pair = 7; - /* - Compressed stream of data. The first byte indicates the compression/decompression method (codec). The remaining bytes are - content compressed with the codec. - */ - optional bytes compressed_data = 8; - /* - Stores meta-data about the reads. Typically meta-data is stored in the very first read of a - read collection, with the understanding that the meta-data applies to all the reads in the - collection. Meta-data can be used to store information about when the sample was sequenced, - or other information of interest. The key-value pair format is sufficiently flexible to - accomodate a variety of needs. The following keys are pre-defined. Please use pre-defined - keys so that automated tools can use metadata in relatively standard way. Please note that - some keys provide a format for the value. This format should also be followed to garantee - that meta data can be used computationally in fully automatic manner. - - key="sequencing-run-start-date" value="MM/DD/YYYY" Used to record when the sequencing run - was initiated on the instrument. Can be used to detect batch effect in a large set of samples. - key="platform" value="<free-text>". Value is free text, but the following terms are pre-defined. - Illumina GaIIx - Illumina HiSeq 1000 - Illumina HiSeq 2000 - Helicos Heliscope - LifeTech 5500 SOLiD - LifeTech 5500xl SOLiD - Roche 454 GS FLX Ti - - key="organism" value="species name" - Since Goby 1.9.1 - */ - repeated MetaData meta_data = 25; - -} -/* - A message to store a key/value pair and represent metadata about reads. - Since Goby 1.9.1 - */ -message MetaData { - /* - Provides the key. See examples in the documentation of meta_data for ReadEntry. - */ - required string key=1; - /* - Describes the value associated with the key. See examples in the documentation of meta_data for ReadEntry. - */ - required string value=2; -} diff --git a/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch b/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch deleted file mode 100644 index 415785466af7..000000000000 --- a/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch +++ /dev/null @@ -1,16 +0,0 @@ - src/Makefile.am | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/Makefile.am b/src/Makefile.am -index 1033382..33ca906 100644 ---- a/src/Makefile.am -+++ b/src/Makefile.am -@@ -84,7 +84,7 @@ GobyReadsStats_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_D - GobyReadsStats_SOURCES = \ - GobyReadsStats.cc - --GobyFastaToCompact_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_DATE_TIME_LIB} ${BOOST_FILESYSTEM_LIB} ${BOOST_PROGRAM_OPTIONS_LIB} -+GobyFastaToCompact_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_DATE_TIME_LIB} ${BOOST_FILESYSTEM_LIB} ${BOOST_PROGRAM_OPTIONS_LIB} -lz - GobyFastaToCompact_SOURCES = \ - GobyFastaToCompact.cc - diff --git a/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild b/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild index fcf8971fceb0..e74dd6ecede3 100644 --- a/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild +++ b/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild @@ -1,4 +1,4 @@ -# Copyright 1999-2015 Gentoo Foundation +# Copyright 1999-2017 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 EAPI=5 @@ -9,7 +9,8 @@ inherit autotools-utils DESCRIPTION="A DNA sequencing data management framework - C/C++ API" HOMEPAGE="http://campagnelab.org/software/goby/" -SRC_URI="http://chagall.med.cornell.edu/goby/releases/archive/release-goby_${PV}/goby_${PV}-cpp.zip" +SRC_URI="http://chagall.med.cornell.edu/goby/releases/archive/release-goby_${PV}/goby_${PV}-cpp.zip + https://dev.gentoo.org/~mgorny/dist/${P}-files.tar.bz2" LICENSE="GPL-3" SLOT="0" @@ -24,7 +25,7 @@ RDEPEND="${DEPEND}" S="${WORKDIR}/${PV}/cpp" PATCHES=( - "${FILESDIR}"/${P}-underlinking.patch + "${WORKDIR}"/${P}-files/${P}-underlinking.patch ) src_prepare() { @@ -33,7 +34,7 @@ src_prepare() { -i src/Makefile.am || die pushd src/goby > /dev/null || die - cp "${FILESDIR}"/*.proto . || die + cp "${WORKDIR}"/${P}-files/*.proto . || die protoc --cpp_out=. *.proto || die popd > /dev/null || die |