https://github.com/matrix-org/dendrite fork for cross-compile for ARM
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
12 KiB

  1. // Copyright 2017 Vector Creations Ltd
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package routing
  15. import (
  16. "context"
  17. "crypto/rand"
  18. "encoding/hex"
  19. "fmt"
  20. "io"
  21. "net/http"
  22. "net/url"
  23. "path"
  24. "strings"
  25. "github.com/matrix-org/dendrite/clientapi/jsonerror"
  26. "github.com/matrix-org/dendrite/mediaapi/fileutils"
  27. "github.com/matrix-org/dendrite/mediaapi/storage"
  28. "github.com/matrix-org/dendrite/mediaapi/thumbnailer"
  29. "github.com/matrix-org/dendrite/mediaapi/types"
  30. "github.com/matrix-org/dendrite/setup/config"
  31. userapi "github.com/matrix-org/dendrite/userapi/api"
  32. "github.com/matrix-org/gomatrixserverlib"
  33. "github.com/matrix-org/util"
  34. log "github.com/sirupsen/logrus"
  35. )
  36. // uploadRequest metadata included in or derivable from an upload request
  37. // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
  38. // NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
  39. type uploadRequest struct {
  40. MediaMetadata *types.MediaMetadata
  41. Logger *log.Entry
  42. }
  43. // uploadResponse defines the format of the JSON response
  44. // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
  45. type uploadResponse struct {
  46. ContentURI string `json:"content_uri"`
  47. }
  48. // Upload implements POST /upload
  49. // This endpoint involves uploading potentially significant amounts of data to the homeserver.
  50. // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
  51. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
  52. // TODO: We should time out requests if they have not received any data within a configured timeout period.
  53. func Upload(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device, db storage.Database, activeThumbnailGeneration *types.ActiveThumbnailGeneration) util.JSONResponse {
  54. r, resErr := parseAndValidateRequest(req, cfg, dev)
  55. if resErr != nil {
  56. return *resErr
  57. }
  58. if resErr = r.doUpload(req.Context(), req.Body, cfg, db, activeThumbnailGeneration); resErr != nil {
  59. return *resErr
  60. }
  61. return util.JSONResponse{
  62. Code: http.StatusOK,
  63. JSON: uploadResponse{
  64. ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.Matrix.ServerName, r.MediaMetadata.MediaID),
  65. },
  66. }
  67. }
  68. // parseAndValidateRequest parses the incoming upload request to validate and extract
  69. // all the metadata about the media being uploaded.
  70. // Returns either an uploadRequest or an error formatted as a util.JSONResponse
  71. func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device) (*uploadRequest, *util.JSONResponse) {
  72. r := &uploadRequest{
  73. MediaMetadata: &types.MediaMetadata{
  74. Origin: cfg.Matrix.ServerName,
  75. FileSizeBytes: types.FileSizeBytes(req.ContentLength),
  76. ContentType: types.ContentType(req.Header.Get("Content-Type")),
  77. UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))),
  78. UserID: types.MatrixUserID(dev.UserID),
  79. },
  80. Logger: util.GetLogger(req.Context()).WithField("Origin", cfg.Matrix.ServerName),
  81. }
  82. if resErr := r.Validate(*cfg.MaxFileSizeBytes); resErr != nil {
  83. return nil, resErr
  84. }
  85. return r, nil
  86. }
  87. func (r *uploadRequest) generateMediaID(ctx context.Context, db storage.Database) (types.MediaID, error) {
  88. for {
  89. // First try generating a meda ID. We'll do this by
  90. // generating some random bytes and then hex-encoding.
  91. mediaIDBytes := make([]byte, 32)
  92. _, err := rand.Read(mediaIDBytes)
  93. if err != nil {
  94. return "", fmt.Errorf("rand.Read: %w", err)
  95. }
  96. mediaID := types.MediaID(hex.EncodeToString(mediaIDBytes))
  97. // Then we will check if this media ID already exists in
  98. // our database. If it does then we had best generate a
  99. // new one.
  100. existingMetadata, err := db.GetMediaMetadata(ctx, mediaID, r.MediaMetadata.Origin)
  101. if err != nil {
  102. return "", fmt.Errorf("db.GetMediaMetadata: %w", err)
  103. }
  104. if existingMetadata != nil {
  105. // The media ID was already used - repeat the process
  106. // and generate a new one instead.
  107. continue
  108. }
  109. // The media ID was not already used - let's return that.
  110. return mediaID, nil
  111. }
  112. }
  113. func (r *uploadRequest) doUpload(
  114. ctx context.Context,
  115. reqReader io.Reader,
  116. cfg *config.MediaAPI,
  117. db storage.Database,
  118. activeThumbnailGeneration *types.ActiveThumbnailGeneration,
  119. ) *util.JSONResponse {
  120. r.Logger.WithFields(log.Fields{
  121. "UploadName": r.MediaMetadata.UploadName,
  122. "FileSizeBytes": r.MediaMetadata.FileSizeBytes,
  123. "ContentType": r.MediaMetadata.ContentType,
  124. }).Info("Uploading file")
  125. // The file data is hashed and the hash is used as the MediaID. The hash is useful as a
  126. // method of deduplicating files to save storage, as well as a way to conduct
  127. // integrity checks on the file data in the repository.
  128. // Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
  129. //
  130. // TODO: This has a bad API shape where you either need to call:
  131. // fileutils.RemoveDir(tmpDir, r.Logger)
  132. // or call:
  133. // r.storeFileAndMetadata(ctx, tmpDir, ...)
  134. // before you return from doUpload else we will leak a temp file. We could make this nicer with a `WithTransaction` style of
  135. // nested function to guarantee either storage or cleanup.
  136. hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, reqReader, cfg.AbsBasePath)
  137. if err != nil {
  138. r.Logger.WithError(err).WithFields(log.Fields{
  139. "MaxFileSizeBytes": *cfg.MaxFileSizeBytes,
  140. }).Warn("Error while transferring file")
  141. return &util.JSONResponse{
  142. Code: http.StatusBadRequest,
  143. JSON: jsonerror.Unknown("Failed to upload"),
  144. }
  145. }
  146. // Check if temp file size exceeds max file size configuration
  147. if bytesWritten > types.FileSizeBytes(*cfg.MaxFileSizeBytes) {
  148. fileutils.RemoveDir(tmpDir, r.Logger) // delete temp file
  149. return requestEntityTooLargeJSONResponse(*cfg.MaxFileSizeBytes)
  150. }
  151. // Look up the media by the file hash. If we already have the file but under a
  152. // different media ID then we won't upload the file again - instead we'll just
  153. // add a new metadata entry that refers to the same file.
  154. existingMetadata, err := db.GetMediaMetadataByHash(
  155. ctx, hash, r.MediaMetadata.Origin,
  156. )
  157. if err != nil {
  158. fileutils.RemoveDir(tmpDir, r.Logger)
  159. r.Logger.WithError(err).Error("Error querying the database by hash.")
  160. resErr := jsonerror.InternalServerError()
  161. return &resErr
  162. }
  163. if existingMetadata != nil {
  164. // The file already exists, delete the uploaded temporary file.
  165. defer fileutils.RemoveDir(tmpDir, r.Logger)
  166. // The file already exists. Make a new media ID up for it.
  167. mediaID, merr := r.generateMediaID(ctx, db)
  168. if merr != nil {
  169. r.Logger.WithError(merr).Error("Failed to generate media ID for existing file")
  170. resErr := jsonerror.InternalServerError()
  171. return &resErr
  172. }
  173. // Then amend the upload metadata.
  174. r.MediaMetadata = &types.MediaMetadata{
  175. MediaID: mediaID,
  176. Origin: r.MediaMetadata.Origin,
  177. ContentType: r.MediaMetadata.ContentType,
  178. FileSizeBytes: r.MediaMetadata.FileSizeBytes,
  179. CreationTimestamp: r.MediaMetadata.CreationTimestamp,
  180. UploadName: r.MediaMetadata.UploadName,
  181. Base64Hash: hash,
  182. UserID: r.MediaMetadata.UserID,
  183. }
  184. } else {
  185. // The file doesn't exist. Update the request metadata.
  186. r.MediaMetadata.FileSizeBytes = bytesWritten
  187. r.MediaMetadata.Base64Hash = hash
  188. r.MediaMetadata.MediaID, err = r.generateMediaID(ctx, db)
  189. if err != nil {
  190. fileutils.RemoveDir(tmpDir, r.Logger)
  191. r.Logger.WithError(err).Error("Failed to generate media ID for new upload")
  192. resErr := jsonerror.InternalServerError()
  193. return &resErr
  194. }
  195. }
  196. r.Logger = r.Logger.WithField("media_id", r.MediaMetadata.MediaID)
  197. r.Logger.WithFields(log.Fields{
  198. "Base64Hash": r.MediaMetadata.Base64Hash,
  199. "UploadName": r.MediaMetadata.UploadName,
  200. "FileSizeBytes": r.MediaMetadata.FileSizeBytes,
  201. "ContentType": r.MediaMetadata.ContentType,
  202. }).Info("File uploaded")
  203. return r.storeFileAndMetadata(
  204. ctx, tmpDir, cfg.AbsBasePath, db, cfg.ThumbnailSizes,
  205. activeThumbnailGeneration, cfg.MaxThumbnailGenerators,
  206. )
  207. }
  208. func requestEntityTooLargeJSONResponse(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse {
  209. return &util.JSONResponse{
  210. Code: http.StatusRequestEntityTooLarge,
  211. JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
  212. }
  213. }
  214. // Validate validates the uploadRequest fields
  215. func (r *uploadRequest) Validate(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse {
  216. if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > types.FileSizeBytes(maxFileSizeBytes) {
  217. return requestEntityTooLargeJSONResponse(maxFileSizeBytes)
  218. }
  219. if strings.HasPrefix(string(r.MediaMetadata.UploadName), "~") {
  220. return &util.JSONResponse{
  221. Code: http.StatusBadRequest,
  222. JSON: jsonerror.Unknown("File name must not begin with '~'."),
  223. }
  224. }
  225. // TODO: Validate filename - what are the valid characters?
  226. if r.MediaMetadata.UserID != "" {
  227. // TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
  228. // (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
  229. // It should be a struct (with pointers into a single string to avoid copying) and
  230. // we should update all refs to use UserID types rather than strings.
  231. // https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
  232. if _, _, err := gomatrixserverlib.SplitID('@', string(r.MediaMetadata.UserID)); err != nil {
  233. return &util.JSONResponse{
  234. Code: http.StatusBadRequest,
  235. JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"),
  236. }
  237. }
  238. }
  239. return nil
  240. }
  241. // storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
  242. // See getPathFromMediaMetadata in fileutils for details of the final path.
  243. // The order of operations is important as it avoids metadata entering the database before the file
  244. // is ready, and if we fail to move the file, it never gets added to the database.
  245. // Returns a util.JSONResponse error and cleans up directories in case of error.
  246. func (r *uploadRequest) storeFileAndMetadata(
  247. ctx context.Context,
  248. tmpDir types.Path,
  249. absBasePath config.Path,
  250. db storage.Database,
  251. thumbnailSizes []config.ThumbnailSize,
  252. activeThumbnailGeneration *types.ActiveThumbnailGeneration,
  253. maxThumbnailGenerators int,
  254. ) *util.JSONResponse {
  255. finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
  256. if err != nil {
  257. r.Logger.WithError(err).Error("Failed to move file.")
  258. return &util.JSONResponse{
  259. Code: http.StatusBadRequest,
  260. JSON: jsonerror.Unknown("Failed to upload"),
  261. }
  262. }
  263. if duplicate {
  264. r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
  265. }
  266. if err = db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil {
  267. r.Logger.WithError(err).Warn("Failed to store metadata")
  268. // If the file is a duplicate (has the same hash as an existing file) then
  269. // there is valid metadata in the database for that file. As such we only
  270. // remove the file if it is not a duplicate.
  271. if !duplicate {
  272. fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
  273. }
  274. return &util.JSONResponse{
  275. Code: http.StatusBadRequest,
  276. JSON: jsonerror.Unknown("Failed to upload"),
  277. }
  278. }
  279. go func() {
  280. busy, err := thumbnailer.GenerateThumbnails(
  281. context.Background(), finalPath, thumbnailSizes, r.MediaMetadata,
  282. activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger,
  283. )
  284. if err != nil {
  285. r.Logger.WithError(err).Warn("Error generating thumbnails")
  286. }
  287. if busy {
  288. r.Logger.Warn("Maximum number of active thumbnail generators reached. Skipping pre-generation.")
  289. }
  290. }()
  291. return nil
  292. }