Dedupe uploads
authorSir_Boops <admin@boops.me>
Sun, 15 Apr 2018 23:37:51 +0000 (17:37 -0600)
committerSir_Boops <admin@boops.me>
Wed, 6 Jun 2018 19:25:44 +0000 (13:25 -0600)
lib/mix/tasks/sample_config.eex
lib/pleroma/upload.ex
lib/pleroma/web/activity_pub/activity_pub.ex
test/upload_test.exs

index e37c864c0dfd6719838962e4b1c80f0574450725..d57591d53c6b02b0a30923db674440184c99eb63 100644 (file)
@@ -8,7 +8,8 @@ config :pleroma, :instance,
   name: "<%= name %>",
   email: "<%= email %>",
   limit: 5000,
-  registrations_open: true
+  registrations_open: true,
+  dedupe_media: true
 
 config :pleroma, :media_proxy,
   enabled: false,
index e5df9400987adfc285302975d32cdc78e1829339..ab4bd16f02dc19384f09c97a727de404d143080b 100644 (file)
@@ -2,20 +2,21 @@ defmodule Pleroma.Upload do
   alias Ecto.UUID
   alias Pleroma.Web
 
-  def store(%Plug.Upload{} = file) do
-    uuid = UUID.generate()
-    upload_folder = Path.join(upload_path(), uuid)
+  def store(%Plug.Upload{} = file, should_dedupe) do
+    content_type = get_content_type(file.path)
+    uuid = get_uuid(file, should_dedupe)
+    name = get_name(file, uuid, content_type, should_dedupe)
+    upload_folder = get_upload_path(uuid, should_dedupe)
+    url_path = get_url(name, uuid, should_dedupe)
+
     File.mkdir_p!(upload_folder)
-    result_file = Path.join(upload_folder, file.filename)
-    File.cp!(file.path, result_file)
+    result_file = Path.join(upload_folder, name)
 
-    # fix content type on some image uploads
-    content_type =
-      if file.content_type in [nil, "application/octet-stream"] do
-        get_content_type(file.path)
-      else
-        file.content_type
-      end
+    if File.exists?(result_file) do
+      File.rm!(file.path)
+    else
+      File.cp!(file.path, result_file)
+    end
 
     %{
       "type" => "Image",
@@ -23,26 +24,48 @@ defmodule Pleroma.Upload do
         %{
           "type" => "Link",
           "mediaType" => content_type,
-          "href" => url_for(Path.join(uuid, :cow_uri.urlencode(file.filename)))
+          "href" => url_path
         }
       ],
-      "name" => file.filename,
-      "uuid" => uuid
+      "name" => name
     }
   end
 
-  def store(%{"img" => "data:image/" <> image_data}) do
+  def store(%{"img" => "data:image/" <> image_data}, should_dedupe) do
     parsed = Regex.named_captures(~r/(?<filetype>jpeg|png|gif);base64,(?<data>.*)/, image_data)
-    data = Base.decode64!(parsed["data"])
+    data = Base.decode64!(parsed["data"], ignore: :whitespace)
     uuid = UUID.generate()
-    upload_folder = Path.join(upload_path(), uuid)
-    File.mkdir_p!(upload_folder)
-    filename = Base.encode16(:crypto.hash(:sha256, data)) <> ".#{parsed["filetype"]}"
-    result_file = Path.join(upload_folder, filename)
+    uuidpath = Path.join(upload_path(), uuid)
+    uuid = UUID.generate()
+
+    File.mkdir_p!(upload_path())
 
-    File.write!(result_file, data)
+    File.write!(uuidpath, data)
 
-    content_type = "image/#{parsed["filetype"]}"
+    content_type = get_content_type(uuidpath)
+
+    name =
+      create_name(
+        String.downcase(Base.encode16(:crypto.hash(:sha256, data))),
+        parsed["filetype"],
+        content_type
+      )
+
+    upload_folder = get_upload_path(uuid, should_dedupe)
+    url_path = get_url(name, uuid, should_dedupe)
+
+    File.mkdir_p!(upload_folder)
+    result_file = Path.join(upload_folder, name)
+
+    if should_dedupe do
+      if !File.exists?(result_file) do
+        File.rename(uuidpath, result_file)
+      else
+        File.rm!(uuidpath)
+      end
+    else
+      File.rename(uuidpath, result_file)
+    end
 
     %{
       "type" => "Image",
@@ -50,11 +73,10 @@ defmodule Pleroma.Upload do
         %{
           "type" => "Link",
           "mediaType" => content_type,
-          "href" => url_for(Path.join(uuid, :cow_uri.urlencode(filename)))
+          "href" => url_path
         }
       ],
-      "name" => filename,
-      "uuid" => uuid
+      "name" => name
     }
   end
 
@@ -63,6 +85,46 @@ defmodule Pleroma.Upload do
     Keyword.fetch!(settings, :uploads)
   end
 
+  defp create_name(uuid, ext, type) do
+    if type == "application/octet-stream" do
+      String.downcase(Enum.join([uuid, ext], "."))
+    else
+      String.downcase(Enum.join([uuid, List.last(String.split(type, "/"))], "."))
+    end
+  end
+
+  defp get_uuid(file, should_dedupe) do
+    if should_dedupe do
+      Base.encode16(:crypto.hash(:sha256, File.read!(file.path)))
+    else
+      UUID.generate()
+    end
+  end
+
+  defp get_name(file, uuid, type, should_dedupe) do
+    if should_dedupe do
+      create_name(uuid, List.last(String.split(file.filename, ".")), type)
+    else
+      file.filename
+    end
+  end
+
+  defp get_upload_path(uuid, should_dedupe) do
+    if should_dedupe do
+      upload_path()
+    else
+      Path.join(upload_path(), uuid)
+    end
+  end
+
+  defp get_url(name, uuid, should_dedupe) do
+    if should_dedupe do
+      url_for(:cow_uri.urlencode(name))
+    else
+      url_for(Path.join(uuid, :cow_uri.urlencode(name)))
+    end
+  end
+
   defp url_for(file) do
     "#{Web.base_url()}/media/#{file}"
   end
index 4e0be5ba2802143daa32350b2d971ccc6111e650..3a03f5fe4718f17c4bed198d04479b49e3f6488d 100644 (file)
@@ -492,7 +492,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
   end
 
   def upload(file) do
-    data = Upload.store(file)
+    data = Upload.store(file, Application.get_env(:pleroma, :instance)[:dedupe_media])
     Repo.insert(%Object{data: data})
   end
 
index d68b3e7badea805f50468e527eee9fa5d4cfff20..645f10293a2d4a800597c4d9889cceb731e48a09 100644 (file)
@@ -3,40 +3,45 @@ defmodule Pleroma.UploadTest do
   use Pleroma.DataCase
 
   describe "Storing a file" do
-    test "copies the file to the configured folder" do
+    test "copies the file to the configured folder with deduping" do
+      File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
+
       file = %Plug.Upload{
         content_type: "image/jpg",
-        path: Path.absname("test/fixtures/image.jpg"),
+        path: Path.absname("test/fixtures/image_tmp.jpg"),
         filename: "an [image.jpg"
       }
 
-      data = Upload.store(file)
-      assert data["name"] == "an [image.jpg"
+      data = Upload.store(file, true)
 
-      assert List.first(data["url"])["href"] ==
-               "http://localhost:4001/media/#{data["uuid"]}/an%20%5Bimage.jpg"
+      assert data["name"] ==
+               "e7a6d0cf595bff76f14c9a98b6c199539559e8b844e02e51e5efcfd1f614a2df.jpeg"
     end
 
-    test "fixes an incorrect content type" do
+    test "copies the file to the configured folder without deduping" do
+      File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
+
       file = %Plug.Upload{
-        content_type: "application/octet-stream",
-        path: Path.absname("test/fixtures/image.jpg"),
+        content_type: "image/jpg",
+        path: Path.absname("test/fixtures/image_tmp.jpg"),
         filename: "an [image.jpg"
       }
 
-      data = Upload.store(file)
-      assert hd(data["url"])["mediaType"] == "image/jpeg"
+      data = Upload.store(file, false)
+      assert data["name"] == "an [image.jpg"
     end
 
-    test "does not modify a valid content type" do
+    test "fixes incorrect content type" do
+      File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
+
       file = %Plug.Upload{
-        content_type: "image/png",
-        path: Path.absname("test/fixtures/image.jpg"),
+        content_type: "application/octet-stream",
+        path: Path.absname("test/fixtures/image_tmp.jpg"),
         filename: "an [image.jpg"
       }
 
-      data = Upload.store(file)
-      assert hd(data["url"])["mediaType"] == "image/png"
+      data = Upload.store(file, true)
+      assert hd(data["url"])["mediaType"] == "image/jpeg"
     end
   end
 end