3
I am developing an academic work in which I should analyze the text of 25 selected videos on various Youtube channels. My advisor gave me a script about how he is developing this, so that I work on my videos, but I just finished and the title error appears: NA' does not exist in current working directory ...
. Follows the code:
library(abjutils)
library(tidytext)
library(reticulate)
reticulate :: use_python("users/agnes/anaconda3/python")
library(spacyr)
spacy_initialize("pt_core_news_sm")
library(tidyverse)
library(magrittr)
library(stm)
library(tm)
library(ggridges)
library(formattable)
#library(subtools)
options(scipen = 999)
## Preparaando os comandos para baixar as legendas
#Campos básicos
fields_raw <- c("id", "title", "alt_title", "creator", "release_date",
"timestamp", "upload_date", "duration", "view_count",
"like_count", "dislike_count", "comment_count")
#Formatando os capos
fields <- fields_raw %>%
map_chr(~paste0("%(", ., ")s")) %>%
# usar &&& como separador de fields
paste0(collapse = "&&&") %>%
# acrescentar aspas no inicio e no final do string
paste0('"', ., '"')
channel_url <- "https://www.youtube.com/watch?v=rmZv19Iylu4"
# montar query (comando) do youtube-dl
cmd_ytdl <- str_glue("youtube-dl -o {fields} -i -v -w --skip-download --write-auto-sub --sub-lang pt {channel_url}")
view(cmd_ytdl)
# acrescentar diretorio
pasta_captions <- "C:/Users/agnes/Documents"
fs::dir_create(pasta_captions)
cmd <- str_glue("cd {pasta_captions} && {cmd_ytdl}")
arquivos_captions <- dir(pasta_captions, pattern = '*.vtt', full.names = TRUE)
amostra <- arquivos_captions[1]
read_lines(amostra) [1:12]