Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ OBJS := $(SRCS:.c=.o)
# directories
CURRENT_DIR := $(shell pwd)

GXX := gcc
CXXFLAGS = -O3 -Wall -Wextra -Wpedantic
GXX ?= gcc
CXXFLAGS = -O3 -Wall -Wextra -Wpedantic -march=native

# object files that need lcptools
LCPTOOLS_CXXFLAGS := -I$(CURRENT_DIR)/lcptools/include
Expand Down
3 changes: 2 additions & 1 deletion fa_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,4 +399,5 @@ void print_ref_seqs(const struct ref_seq *seqs, int is_rgfa, FILE *out) {
fprintf(out, "\t*\n");
}
}
}
}

3 changes: 2 additions & 1 deletion fa_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ void read_fasta(struct opt_arg *args, struct ref_seq *seqs);
*/
void print_ref_seqs(const struct ref_seq *seqs, int is_rgfa, FILE *out);

#endif
#endif

3 changes: 2 additions & 1 deletion lbdg.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,5 @@ void lbdg_print_ref_seq(struct ref_seq *seqs, FILE *out) {
}

free(bit_arr);
}
}

3 changes: 2 additions & 1 deletion lbdg.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@
*/
void lbdg_print_ref_seq(struct ref_seq *seqs, FILE *out);

#endif
#endif

3 changes: 2 additions & 1 deletion lcpan.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,5 @@ int main(int argc, char* argv[]) {
free_ref_seq(&seqs);

return 0;
}
}

12 changes: 6 additions & 6 deletions opt_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ int summarize(struct opt_arg *args) {
return 1;
}

void printOptions() {
void printOptions(void) {
fprintf(stderr, "[Options]:\n");
fprintf(stderr, "\t--ref | -r Reference FASTA File. (.fai should be present)\n");
fprintf(stderr, "\t--vcf | -v VCF File.\n");
Expand All @@ -44,7 +44,7 @@ void printOptions() {
fprintf(stderr, "\t--verbose Verbose [Default: false]\n");
}

void printUsage() {
void printUsage(void) {
fprintf(stderr, "Usage: ./lcpan [PROGRAM] [OPTIONS]\n\n");
fprintf(stderr, "[PROGRAM]: \n");
fprintf(stderr, "\t-vg: Uses a variation graph-based approach.\n");
Expand Down Expand Up @@ -207,7 +207,7 @@ void parse_opts(int argc, char* argv[], struct opt_arg *args) {
validate_file(args->vcf_path, "vcf");
}

char *fai_path = malloc(strlen(args->fasta_path)+5);
char *fai_path = (char *) malloc(strlen(args->fasta_path)+5);
if (fai_path == NULL) {
fprintf(stderr, "[ERROR] Memory allocation failed\n");
exit(EXIT_FAILURE);
Expand All @@ -225,14 +225,14 @@ void parse_opts(int argc, char* argv[], struct opt_arg *args) {
}
} else {
if (args->is_rgfa) {
args->gfa_path = malloc(strlen(args->prefix)+6);
args->gfa_path = (char *) malloc(strlen(args->prefix)+6);
if (!args->gfa_path) {
fprintf(stderr, "[ERROR] malloc failed");
exit(EXIT_FAILURE);
}
snprintf(args->gfa_path, strlen(args->prefix)+6, "%s.rgfa", args->prefix);
} else {
args->gfa_path = malloc(strlen(args->prefix)+5);
args->gfa_path = (char *) malloc(strlen(args->prefix)+5);
if (!args->gfa_path) {
fprintf(stderr, "[ERROR] malloc failed");
exit(EXIT_FAILURE);
Expand All @@ -246,4 +246,4 @@ void parse_opts(int argc, char* argv[], struct opt_arg *args) {
}

(void)(args->verbose && summarize(args));
}
}
3 changes: 2 additions & 1 deletion struct_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,5 @@ struct t_arg {
int *exit_signal;
};

#endif
#endif

6 changes: 3 additions & 3 deletions tpool.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct tpool_work *tpool_work_create(thread_func_t func, void *arg) {
if (func == NULL)
return NULL;

work = malloc(sizeof(struct tpool_work));
work = (struct tpool_work *) malloc(sizeof(struct tpool_work));
work->func = func;
work->arg = arg;
work->next = NULL;
Expand Down Expand Up @@ -63,7 +63,7 @@ struct tpool_work *tpool_work_get(struct tpool *tm) {
* @param arg Pointer to the thread pool structure.
*/
void* tpool_worker(void *arg) {
struct tpool *tm = arg;
struct tpool *tm = (struct tpool *) arg;
struct tpool_work *work;

while (1) {
Expand Down Expand Up @@ -106,7 +106,7 @@ struct tpool *tpool_create(size_t num) {
if (num == 0)
num = 1;

tm = calloc(1, sizeof(*tm));
tm = (struct tpool *) calloc(1, sizeof(*tm));
tm->thread_cnt = num;

pthread_mutex_init(&(tm->work_mutex), NULL);
Expand Down
8 changes: 7 additions & 1 deletion utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
#include <string.h>
#include <pthread.h>

#if defined (__cplusplus)
#define COLITERAL(type) type
#else
#define COLITERAL(type) (struct type)
#endif

#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))

Expand Down Expand Up @@ -182,4 +188,4 @@ void refine_seq(struct lps *str, int no_overlap);
*/
void print_path(const struct ref_seq *seqs, FILE *out);

#endif
#endif
38 changes: 18 additions & 20 deletions vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ int add_elem_2_vgd(struct vg_data *vgd, uint64_t *arr, int size, uint64_t start,
}
while (i < size && (uint32_t)(arr[i]) < end) {
check_vg_data(vgd); // check if there is a space to add element
vgd->data[vgd->size] = (struct vg_data_element){INCOMING, arr[i] >> 32, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF & arr[i], NULL, NULL, 0}; // order not matter here
vgd->data[vgd->size] = COLITERAL(vg_data_element){INCOMING, arr[i] >> 32, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF & arr[i], NULL, NULL, 0}; // order not matter here
vgd->size++;
i++;
}
Expand Down Expand Up @@ -302,21 +302,21 @@ void vg_read_vcf_thd(void *args) {
uint64_t prev_segment_id = vgd->prev_id;
for (int k=0; k<segment_count-1; k++) {
uint64_t segment_id = set_id(vgd, split_points[k+1], &(t_args->core_id_index));
segments[k] = (struct simple_core){segment_id, split_points[k], split_points[k+1]};
segments[k] = COLITERAL(simple_core){segment_id, split_points[k], split_points[k+1]};
print_seq(segment_id, seq+split_points[k], split_points[k+1]-split_points[k], seq_name, split_points[k], 0, t_args->is_rgfa, t_args->out1);
print_link(prev_segment_id, '+', segment_id, '+', 0, t_args->out2);
prev_segment_id = segment_id;
t_args->seqs->chrs[vgd->chr_idx].ids[vgd->core_idx][k] = segment_id;
}
segments[segment_count-1] = (struct simple_core){vgd->curr_id, split_points[segment_count-1], curr_core->end};
segments[segment_count-1] = COLITERAL(simple_core){vgd->curr_id, split_points[segment_count-1], curr_core->end};
print_seq(vgd->curr_id, seq+split_points[segment_count-1], curr_core->end-split_points[segment_count-1], seq_name, split_points[segment_count-1], 0, t_args->is_rgfa, t_args->out1);
print_link(prev_segment_id, '+', vgd->curr_id, '+', 0, t_args->out2);
t_args->seqs->chrs[vgd->chr_idx].ids[vgd->core_idx][segment_count-1] = 0;
} else {
const char *seq = t_args->seqs->chrs[vgd->chr_idx].seq;
const char *seq_name = t_args->seqs->chrs[vgd->chr_idx].seq_name;
const struct simple_core *curr_core = &(t_args->seqs->chrs[vgd->chr_idx].cores[vgd->core_idx]);
segments[0] = (struct simple_core){vgd->curr_id, curr_core->start, curr_core->end};
segments[0] = COLITERAL(simple_core){vgd->curr_id, curr_core->start, curr_core->end};
print_seq(vgd->curr_id, seq+curr_core->start, curr_core->end-curr_core->start, seq_name, curr_core->start, 0, t_args->is_rgfa, t_args->out1);
print_link(vgd->prev_id, '+', vgd->curr_id, '+', 0, t_args->out2);
t_args->seqs->chrs[vgd->chr_idx].ids[vgd->core_idx] = NULL;
Expand Down Expand Up @@ -499,8 +499,6 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
exit(EXIT_FAILURE);
}

int line_count = 0;

int rem_vars_capacity = 256;
int rem_vars_size = 0;
uint64_t *rem_vars = (uint64_t *)malloc(rem_vars_capacity * sizeof(uint64_t)); // id+end
Expand Down Expand Up @@ -532,7 +530,6 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
// validate `line`
if (skip_line || len < 2 || line[0] == '#') continue;
if (line[len - 1] == '\n') { line[len - 1] = '\0'; len--; }
line_count++;

// parse the `line`
char *chrom, *index, *id, *ref, *alt;
Expand Down Expand Up @@ -665,12 +662,12 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
uint64_t ref_token_len = strlen(ref_token);
if (check_vg_data(vgd)) {
if (offset + ref_token_len < curr_chr->cores[core_idx].end) { // DEL inside
vgd->data[vgd->size] = (struct vg_data_element){IN_DEL, 0, offset+1, offset+ref_token_len, NULL, NULL, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_DEL, 0, offset+1, offset+ref_token_len, NULL, NULL, order};
vgd->size++;
} else if (offset + 1 < curr_chr->cores[core_idx].end) { // it starts inside
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+ref_token_len);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
vgd->data[vgd->size] = (struct vg_data_element){OUT_DEL, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_DEL, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order};
vgd->size++;
args->core_id_index++;
} else {
Expand All @@ -691,12 +688,12 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
if (1 == ref_len && 1 == alt_token_len) { // SNP
if (offset + 1 < curr_chr->cores[core_idx].end) {
print_seq_vg(args->core_id_index, alt_token, 1, id, order, offset, 1, args->is_rgfa, out_segment);
vgd->data[vgd->size] = (struct vg_data_element){IN_SNP, args->core_id_index, offset, offset+1, NULL, NULL, order}; // id assigned for segment
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_SNP, args->core_id_index, offset, offset+1, NULL, NULL, order}; // id assigned for segment
} else {
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+1);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
print_seq_vg(args->core_id_index, alt_token, 1, id, order, offset, 1, args->is_rgfa, out_segment);
vgd->data[vgd->size] = (struct vg_data_element){OUT_SNP, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order}; // id assigned for segment
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_SNP, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order}; // id assigned for segment
}
vgd->size++;
args->core_id_index++;
Expand All @@ -705,23 +702,23 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
if (alt_token_len / 2 < curr_chr->cores[core_idx].end - curr_chr->cores[core_idx].start) {
print_seq_vg(args->core_id_index, alt_token+1, alt_token_len-1, id, order, offset, 1, args->is_rgfa, out_segment);
if (offset + 1 < curr_chr->cores[core_idx].end) {
vgd->data[vgd->size] = (struct vg_data_element){IN_INS, args->core_id_index, offset+1, offset+1, NULL, NULL, order}; // id assigned for segment
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_INS, args->core_id_index, offset+1, offset+1, NULL, NULL, order}; // id assigned for segment
} else {
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+1);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
vgd->data[vgd->size] = (struct vg_data_element){OUT_INS, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order}; // id assigned for segment
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_INS, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order}; // id assigned for segment
}
vgd->size++;
args->core_id_index++;
} else { // Large INS, to be processed with LCP
char *alt_token_copy = strdup(alt_token);
char *seq_id = strdup(id);
if (offset + 1 < curr_chr->cores[core_idx].end) { // if inside of the lcp core
vgd->data[vgd->size] = (struct vg_data_element){IN_INS_SV, args->core_id_index, offset+1, offset+1, alt_token_copy, seq_id, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_INS_SV, args->core_id_index, offset+1, offset+1, alt_token_copy, seq_id, order};
} else { // if in the edge of the end of the lcp core
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+1);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
vgd->data[vgd->size] = (struct vg_data_element){OUT_INS_SV, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, alt_token_copy, seq_id, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_INS_SV, args->core_id_index, offset+1, 0xFFFFFFFFFFFFFFFF, alt_token_copy, seq_id, order};
}
vgd->size++;
args->core_id_index++;
Expand All @@ -730,23 +727,23 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
if (alt_token_len / 2 < curr_chr->cores[core_idx].end - curr_chr->cores[core_idx].start) { // alteration, simply print the underling string
print_seq_vg(args->core_id_index, alt_token, alt_token_len, id, order, offset, 1, args->is_rgfa, out_segment);
if (offset + ref_len < curr_chr->cores[core_idx].end) {
vgd->data[vgd->size] = (struct vg_data_element){IN_ALT, args->core_id_index, offset, offset+ref_len, NULL, NULL, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_ALT, args->core_id_index, offset, offset+ref_len, NULL, NULL, order};
} else {
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+ref_len);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
vgd->data[vgd->size] = (struct vg_data_element){OUT_ALT, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_ALT, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, NULL, NULL, order};
}
vgd->size++;
args->core_id_index++;
} else { // check if it the alt_token requires LCP processing
char *alt_token_copy = strdup(alt_token);
char *seq_id = strdup(id);
if (offset + ref_len < curr_chr->cores[core_idx].end) {
vgd->data[vgd->size] = (struct vg_data_element){IN_ALT_SV, args->core_id_index, offset, offset+ref_len, alt_token_copy, seq_id, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){IN_ALT_SV, args->core_id_index, offset, offset+ref_len, alt_token_copy, seq_id, order};
} else {
rem_vars_size = add_elem_2_rem_arr(&rem_vars, rem_vars_size, rem_vars_capacity, args->core_id_index, offset+ref_len);
if (rem_vars_size > rem_vars_capacity) rem_vars_capacity *= 2;
vgd->data[vgd->size] = (struct vg_data_element){OUT_ALT_SV, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, alt_token_copy, seq_id, order};
vgd->data[vgd->size] = COLITERAL(vg_data_element){OUT_ALT_SV, args->core_id_index, offset, 0xFFFFFFFFFFFFFFFF, alt_token_copy, seq_id, order};
}
vgd->size++;
args->core_id_index++;
Expand Down Expand Up @@ -856,4 +853,5 @@ void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
fclose(out_path);
fclose(out_segment);
fclose(out_link);
}
}

3 changes: 2 additions & 1 deletion vg.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@
*/
void vg_read_vcf(struct opt_arg *args, struct ref_seq *seqs);

#endif
#endif

3 changes: 2 additions & 1 deletion vgx.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,4 +414,5 @@ void vgx_read_vcf(struct opt_arg *args, struct ref_seq *seqs) {
fclose(out_log);

printf("[INFO] Ended processing %d lines. \n", line_count);
}
}

3 changes: 2 additions & 1 deletion vgx.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@
*/
void vgx_read_vcf(struct opt_arg *args, struct ref_seq *seqs);

#endif
#endif