this post was submitted on 25 Aug 2024
504 points (98.5% liked)

Open Source

31364 readers
130 users here now

All about open source! Feel free to ask questions, and share news, and interesting stuff!

Useful Links

Rules

Related Communities

Community icon from opensource.org, but we are not affiliated with them.

founded 5 years ago
MODERATORS
you are viewing a single comment's thread
view the rest of the comments
[–] [email protected] 6 points 3 months ago

I made a script for grabbing reddit videos that's been working pretty well for me, needs Babashka to run https://babashka.org/

#!/usr/bin/env bb
(require '[clojure.java.shell :refer [sh]]
         '[clojure.string :as string]
         '[cheshire.core :as cheshire]
         '[org.httpkit.client :as http]
         '[clojure.walk :as walk])

(defn http-get [url]
  (-> @(http/get url {})
      :body))

(defn find-base-url [data]
  (let [results (atom [])]
    (walk/postwalk
     (fn [node]
       (when (and (string? node) (.contains node "DASH"))
         (swap! results conj node))
       node)
     data)
    (some-> @results first (string/replace #"DASH_[0-9]+\.mp4" ""))))

(defn find-best-quality [names audio?]
  (->> ((if audio? filter remove) #(.contains (.toLowerCase %) "audio") names)
       (sort-by
        (fn [n]
          (-> n
              (string/replace #"\.mp4" "")
              (string/replace #"[a-zA-Z_]" "")
              (Integer/parseInt))))
       (last)))

(defn find-parts [base-url data]
  (let [url (atom nil)
        _ (walk/prewalk
           (fn [node]
             (when (and (map? node)
                        (contains? node :dash_url))
               (reset! url (:dash_url node)))
             node)
           data)
        xml (http-get @url)
        parts (->> (re-seq #"<BaseURL>(.*?)</BaseURL>" xml) (map second))
        best-video (find-best-quality parts false)
        best-audio (find-best-quality parts true)]
    [(str base-url best-video) (str base-url best-audio)]))

(defn filename [url]
  (let [idx (inc (.lastIndexOf url "/"))]
    (subs url idx)))

(defn tsname []
  (str "video-" (System/currentTimeMillis) ".mp4"))

(let [data (-> (first *command-line-args*) (str ".json") http-get (cheshire/decode true)) 
      base-url (find-base-url data)
      [video-url audio-url] (find-parts base-url data)
      video-file (filename video-url)
      audio-file (filename audio-url)]
  (sh "wget" video-url)
  (sh "wget" audio-url)
  (sh "ffmpeg" "-i" video-file "-stream_loop" "-1" "-i" audio-file "-shortest" "-map" "0:v:0" "-map" "1:a:0" "-y" (tsname))
  (sh "rm" audio-file video-file))