Major improvement, security handling, file handling +fixes

This commit is contained in:
unknown
2026-05-23 00:13:56 +02:00
parent 2129081599
commit a7b44af91a
25 changed files with 925 additions and 116 deletions

View File

@@ -15,3 +15,4 @@ tempfile = "3"
tracing = "0.1"
chrono = "0.4"
sodiumoxide = "0.2"
blake3 = "1.5"

View File

@@ -1,12 +1,12 @@
use cgcx_config::Config;
use cgcx_core::{ContentFile, ContentId, ContentStatus, Content, Result, CgcxError};
use cgcx_crypto::{ContentKey, wrap_content_key};
use cgcx_db::{Database, ContentRepo, ContentFileRepo};
use cgcx_db::{Database, ContentRepo, ContentFileRepo, HashBlacklistRepo};
use cgcx_storage::Storage;
use cgcx_content_typing::{detect_mime_type, compute_render_flags};
use sodiumoxide::crypto::secretstream::xchacha20poly1305::Tag::{Message, Final};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
use std::collections::HashSet;
pub use cgcx_crypto::MasterKey;
@@ -54,6 +54,7 @@ impl FilePipeline {
let temp_path = named_temp.path().to_path_buf();
let mut total_size: u64 = 0;
let mut plaintext_hasher = blake3::Hasher::new();
{
let mut temp_file = tokio::fs::File::create(&temp_path).await
.map_err(|e| CgcxError::Storage(format!("create temp file: {}", e)))?;
@@ -73,6 +74,7 @@ impl FilePipeline {
)));
}
total_size = new_total;
plaintext_hasher.update(&buf[..pending]);
let ciphertext = encrypt_stream.push(&buf[..pending], Message);
temp_file.write_all(&(ciphertext.len() as u32).to_le_bytes()).await
.map_err(|e| CgcxError::Storage(format!("write length prefix: {}", e)))?;
@@ -94,6 +96,7 @@ impl FilePipeline {
)));
}
total_size = new_total;
plaintext_hasher.update(&buf[..pending]);
let ciphertext = encrypt_stream.push(&buf[..pending], Final);
temp_file.write_all(&(ciphertext.len() as u32).to_le_bytes()).await
.map_err(|e| CgcxError::Storage(format!("write length prefix: {}", e)))?;
@@ -117,9 +120,43 @@ impl FilePipeline {
.map_err(|e| CgcxError::Storage(format!("flush temp file: {}", e)))?;
}
let plaintext_hash = plaintext_hasher.finalize();
let encrypted_hash = encrypt_stream.finalize();
let ciphertext_size_bytes = self.storage.file_size(&temp_path).await?;
let file_repo = ContentFileRepo::new(self.db.conn());
let hash_bytes = plaintext_hash.as_bytes();
// N — Hash blacklist enforcement
let blacklist_repo = HashBlacklistRepo::new(self.db.conn());
if blacklist_repo.contains(hash_bytes).await? {
drop(named_temp);
return Err(CgcxError::BlockedHash);
}
// M — Deduplication
if let Some(existing) = file_repo.find_active_by_plaintext_hash(hash_bytes).await? {
drop(named_temp);
let content_file = ContentFile {
content_id: content_id.clone(),
file_index,
original_name: original_name.to_string(),
stored_path: existing.stored_path.clone(),
mime_type: existing.mime_type.clone(),
size_bytes: total_size,
ciphertext_size_bytes: existing.ciphertext_size_bytes,
encrypted_key_wrapped: existing.encrypted_key_wrapped.clone(),
encrypted_hash: existing.encrypted_hash.clone(),
render_flags,
created_at: chrono::Utc::now(),
plaintext_hash: hash_bytes.to_vec(),
ref_count: 0,
};
file_repo.insert(&content_file).await?;
file_repo.increment_ref_count(&existing.content_id, existing.file_index).await?;
return Ok(content_file);
}
let final_path = self.storage.file_path(content_id, file_index, &mime_type)?;
if let Some(parent) = final_path.parent() {
tokio::fs::create_dir_all(parent).await
@@ -143,9 +180,10 @@ impl FilePipeline {
encrypted_hash: encrypted_hash.to_vec(),
render_flags,
created_at: chrono::Utc::now(),
plaintext_hash: hash_bytes.to_vec(),
ref_count: 1,
};
let file_repo = ContentFileRepo::new(self.db.conn());
file_repo.insert(&content_file).await?;
Ok(content_file)
@@ -158,6 +196,7 @@ impl FilePipeline {
max_views: Option<u64>,
allow_download: bool,
password_hash: Option<String>,
show_author: bool,
) -> Result<()> {
let content = Content {
id: content_id,
@@ -167,6 +206,7 @@ impl FilePipeline {
max_views,
allow_download,
password_hash,
show_author,
created_at: chrono::Utc::now(),
deleted_at: None,
};
@@ -185,8 +225,20 @@ impl FilePipeline {
if !keep_disk {
for file in &files {
if let Err(e) = tokio::fs::remove_file(&file.stored_path).await {
tracing::warn!("failed to remove file {:?}: {}", file.stored_path, e);
if file.ref_count > 0 {
if let Err(e) = file_repo.decrement_ref_count(&file.content_id, file.file_index).await {
tracing::warn!("failed to decrement ref_count for {:?}: {}", file.stored_path, e);
}
} else {
if let Err(e) = file_repo.decrement_ref_count_for_path(&file.stored_path).await {
tracing::warn!("failed to decrement owner ref_count for {:?}: {}", file.stored_path, e);
}
}
let remaining = file_repo.count_by_path_excluding_content(&file.stored_path, content_id).await.unwrap_or(1);
if remaining == 0 {
if let Err(e) = tokio::fs::remove_file(&file.stored_path).await {
tracing::warn!("failed to remove file {:?}: {}", file.stored_path, e);
}
}
}
if let Some(first) = files.first() {
@@ -243,24 +295,6 @@ impl FilePipeline {
continue;
}
let content_id_str = dir_path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
let db_paths: HashSet<std::path::PathBuf> = if ContentId::is_valid(content_id_str) {
let content_id = ContentId::new_unchecked(content_id_str.to_string());
match file_repo.list_by_content(&content_id).await {
Ok(files) => files.into_iter().map(|f| f.stored_path).collect(),
Err(e) => {
tracing::warn!("failed to list files for {}: {}", content_id, e);
continue;
}
}
} else {
// Invalid content directory nothing in it can be referenced.
HashSet::new()
};
let mut sub_entries = tokio::fs::read_dir(&dir_path).await
.map_err(|e| CgcxError::Storage(format!("read content dir: {}", e)))?;
while let Some(sub_entry) = sub_entries.next_entry().await
@@ -268,7 +302,8 @@ impl FilePipeline {
{
let path = sub_entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("enc") {
if !db_paths.contains(&path) {
let count = file_repo.count_by_path(&path).await.unwrap_or(1);
if count == 0 {
if let Err(e) = tokio::fs::remove_file(&path).await {
tracing::warn!("failed to remove orphan enc file {:?}: {}", path, e);
} else {