}))
toFlip
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(id = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(id), function(i) {
cond <- .data$cond[i]
id <- .data$id[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(id = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(id), function(i) {
cond <- .data$cond[i]
id <- .data$id[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
left_join(rmap, toFlip, by=c("id" = "realID"))
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(id), function(i) {
cond <- .data$cond[i]
id <- .data$id[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
left_join(rmap, toFlip, by=c("id" = "realID"))
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(id), function(i) {
cond <- .data$cond[i]
id <- .data$idOrig[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(idOrig), function(i) {
cond <- .data$cond[i]
id <- .data$idOrig[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
toFlip
left_join(rmap, toFlip, by=c("id" = "realID"))
rmap
left_join(rmap, toFlip, by=c("experiment" = "realID"))
left_join(rmap, toFlip, by=c("experiment" = "realID")) -> dd
View(dd)
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\3"))
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
dd <-  list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
dd$genome
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(idOrig), function(i) {
cond <- .data$cond[i]
id <- .data$idOrig[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
left_join(rmap, toFlip, by=c("experiment" = "realID")) -> dd
View(dd)
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) -> dd
View(dd)
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam"))
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) -> dd
View(dd)
message(i, "/", seq(experiment))
i <- 1
message(i, "/", seq(.data$experiment))
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map(seq(experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
file.rename(oldName, newName)
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
NULL
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
lapply(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
NULL
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
lapply(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
NULL
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
lapply(seq(.$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
NULL
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map_chr(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
NULL
})
# file.rename(oldName, newName)
print("ASD")
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) %>%
map_chr(seq(.data$experiment), function(i) {
message(i, "/", seq(.data$experiment))
oldName <- .data$oldName[i]
newName <- .data$newName[i]
# file.rename(oldName, newName)
print("ASD")
})
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome")) %>%
mutate(oldName = paste0("data-raw/RMapDB_generate/bams/", value),
newName = paste0("data-raw/RMapDB_generate/bams/", experiment, "_", genome, ".bam")) -> dd
lapply(seq(dd$experiment), function(i) {
message(i, "/", seq(dd$experiment))
oldName <- dd$oldName[i]
newName <- dd$newName[i]
# file.rename(oldName, newName)
print("ASD")
})
lapply(seq(dd$experiment), function(i) {
message(i, "/", length(dd$experiment))
oldName <- dd$oldName[i]
newName <- dd$newName[i]
# file.rename(oldName, newName)
print("ASD")
})
lapply(seq(dd$experiment), function(i) {
message(i, "/", length(dd$experiment))
oldName <- dd$oldName[i]
newName <- dd$newName[i]
# file.rename(oldName, newName)
print(paste0(oldName,  ' ->  ', newName))
})
lapply(seq(dd$experiment), function(i) {
message(i, "/", length(dd$experiment))
oldName <- dd$oldName[i]
newName <- dd$newName[i]
file.rename(oldName, newName)
print(paste0(oldName,  ' ->  ', newName))
})
list.files(path = "data-raw/RMapDB_generate/bams/") %>%
as_tibble()
list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+")
list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
View(rmap)
# still some to fix
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(idOrig), function(i) {
cond <- .data$cond[i]
id <- .data$idOrig[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
toFlip
me to fix
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
toFlip
list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2"))
# still some to fix
toFlip <- list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+") %>%
as_tibble() %>%
mutate(idOrig = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\1"),
cond = gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([experimentcontrol]+)\\.bam", replacement = "\\2"),
genome =  gsub(value, pattern = "([ES]{1}RX[0-9]+)_.+\\.([a-zA-Z0-9]+)\\.([experimentcontrol]+)\\.bam", replacement = "\\2")) %>%
mutate(realID = map_chr(seq(idOrig), function(i) {
cond <- .data$cond[i]
id <- .data$idOrig[i]
ifelse(cond == "experiment", id, rmap$control[rmap$experiment == id])
}))
View(toFlip)
left_join(rmap, toFlip, by=c("experiment" = "realID", "genome"))
inner_join(rmap, toFlip, by=c("experiment" = "realID", "genome"))
right_join(rmap, toFlip, by=c("experiment" = "realID"))
# still some to fix
toDel <- list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+")
toDel
# still some to fix
toDel <- list.files(path = "data-raw/RMapDB_generate/bams/", pattern = ".+_.+_.+", full.names = TRUE)
file.remove(toDel)
list.files(path = "data-raw/RMapDB_generate/bams/")
rmap
reticulate::repl_python()
setwd("~/projects/RSeq/tests")
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
outjson
reticulate::repl_python()
setwd("~/projects/RSeqR")
# Move files again...
list.files("data-raw/RMapDB_generate/bam/")
# Move files again...
fs <- list.files("data-raw/RMapDB_generate/bam/")
print(fn)
# Move files again...
fs <- list.files("data-raw/RMapDB_generate/bam/")
sapply(fs, function(fn) {
print(fn)
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
})
# Move files again...
BAMDIR <- "data-raw/RMapDB_generate/bam/"
fs <- list.files(BAMDIR)
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
dir.create(mkdir, showWarnings = FALSE)
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
dir.create(mkdir, showWarnings = FALSE)
nfn <- file.path(mkdir, fn)
# file.rename(fn, )
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
dir.create(mkdir, showWarnings = FALSE)
nfn <- file.path(mkdir, fn)
file.rename(fn, nfn)
})
warnings()
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
dir.create(mkdir, showWarnings = FALSE)
nfn <- file.path(mkdir, fn)
ofn <- file.path(BAMDIR, fn)
# file.rename(, nfn)
})
sapply(fs, function(fn) {
id <- gsub(fn, pattern = "(.+)_.+", replacement = "\\1")
mkdir <- file.path(BAMDIR, id)
dir.create(mkdir, showWarnings = FALSE)
nfn <- file.path(mkdir, fn)
ofn <- file.path(BAMDIR, fn)
file.rename(ofn, nfn)
})
tibble(
experiment = "SRX1025890_hg38.bam",
control = "SRX1025893_hg38.bam"
) %>% write_csv("../RSeq/tests/test_data/bam_test_samples_1.csv")
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
setwd("~/projects/RSeq/tests/test_data")
reticulate::repl_python()
tibble(
list.files()
tibble(
experiment = c("SRX1025890_hg38.bam",  "SRX6427717_hg38.bam"),
control = c("SRX1025893_hg38.bam", NA)
) %>% write_csv("bam_test_samples_1.csv")
library(tidyverse)
tibble(
experiment = c("SRX1025890_hg38.bam",  "SRX6427717_hg38.bam"),
control = c("SRX1025893_hg38.bam", NA)
) %>% write_csv("bam_test_samples_1.csv")
reticulate::repl_python()
reticulate::repl_python()
read_tsv("~/projects/RSeq/rseq/src/data/available_genomes.tsv.xz")
reticulate::repl_python()
reticulate::repl_python()
read_tsv("../../rseq/src/data/available_genomes.tsv.xz")
av_gen <- read_tsv("../../rseq/src/data/available_genomes.tsv.xz")
View(av_gen)
av_gen %>%
select(contains("eff_gen"))
av_gen %>%
select(UCSC_orgID, contains("eff_gen"))
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen'))
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(size = gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(size = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1")))
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(size = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name)
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, )
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, eff_genome_size=value)
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, eff_genome_size=value)
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, eff_genome_size=value) %>%
reorder(read_length, .before=eff_genome_size)
?reorder
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, eff_genome_size=value) %>%
relocate(read_length, .before=eff_genome_size)
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
select(-name, eff_genome_size=value) %>%
relocate(read_length, .before=eff_genome_size) %>%
write_tsv("../../rseq/src/data/eff_gen_size.tsv")
system("xz ../../rseq/src/data/eff_gen_size.tsv")
reticulate::repl_python()
library(tidyverse)
av_gen <- read_tsv("available_genomes.tsv.xz")
setwd("../../rseq/src/data/")
av_gen <- read_tsv("available_genomes.tsv.xz")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1")))
dd <- av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1")))
View(dd)
dd <- av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(.direction = "down")
View(dd)
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(value, .direction = "down")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(value, .direction = "down") -> dd
av_gen <- read_tsv("available_genomes.tsv.xz")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
drop_na()
dd <- av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
drop_na() %>%
select(-name, eff_genome_size=value) %>%
relocate(read_length, .before=eff_genome_size)
View(dd)
library(tidyverse)
av_gen <- read_tsv("available_genomes.tsv.xz")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
drop_na() %>%
select(-name, eff_genome_size=value) %>%
relocate(read_length, .before=eff_genome_size) %>%
write_tsv("eff_gen_size.tsv")
system("xz eff_gen_size.tsv")
system("xz -f eff_gen_size.tsv")
reticulate::repl_python()
reticulate::repl_python()
library(tidyverse)
av_gen <- read_tsv("available_genomes.tsv.xz")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(value, .direction = "down")
dd <- av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(value, .direction = "down")
View(dd)
library(tidyverse)
av_gen <- read_tsv("available_genomes.tsv.xz")
av_gen %>%
select(UCSC_orgID, contains("eff_gen")) %>%
pivot_longer(cols = contains('eff_gen')) %>%
mutate(read_length = as.numeric(gsub(name, pattern = ".+_([0-9]+)bp$", replacement = "\\1"))) %>%
fill(value, .direction = "down") %>%
select(-name, eff_genome_size=value) %>%
relocate(read_length, .before=eff_genome_size) %>%
write_tsv("eff_gen_size.tsv")
system("xz -f eff_gen_size.tsv")
reticulate::repl_python()
