-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathlatents.py
351 lines (290 loc) · 11.4 KB
/
latents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
from typing import Optional
import comfy_extras.nodes_lt as nodes_lt
import torch
from .nodes_registry import comfy_node
@comfy_node(name="LTXVSelectLatents")
class LTXVSelectLatents:
"""
Selects a range of frames from a video latent.
Features:
- Supports positive and negative indexing
- Preserves batch processing capabilities
- Handles noise masks if present
- Maintains 5D tensor format
"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"samples": ("LATENT",),
"start_index": (
"INT",
{"default": 0, "min": -9999, "max": 9999, "step": 1},
),
"end_index": (
"INT",
{"default": -1, "min": -9999, "max": 9999, "step": 1},
),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "select_latents"
CATEGORY = "latent/video"
def select_latents(self, samples: dict, start_index: int, end_index: int) -> tuple:
"""
Selects a range of frames from the video latent.
Args:
samples (dict): Video latent dictionary
start_index (int): Starting frame index (supports negative indexing)
end_index (int): Ending frame index (supports negative indexing)
Returns:
tuple: Contains modified latent dictionary with selected frames
Raises:
ValueError: If indices are invalid
"""
try:
s = samples.copy()
video_latent = s["samples"]
batch, channels, frames, height, width = video_latent.shape
# Handle negative indices
start_idx = frames + start_index if start_index < 0 else start_index
end_idx = frames + end_index if end_index < 0 else end_index
# Validate and clamp indices
start_idx = max(0, min(start_idx, frames - 1))
end_idx = max(0, min(end_idx, frames - 1))
if start_idx > end_idx:
start_idx = min(start_idx, end_idx)
# Select frames while maintaining 5D format
s["samples"] = video_latent[:, :, start_idx : end_idx + 1, :, :]
# Handle noise mask if present
if "noise_mask" in s:
s["noise_mask"] = s["noise_mask"][:, :, start_idx : end_idx + 1, :, :]
return (s,)
except Exception as e:
print(f"[LTXVSelectLatents] Error: {str(e)}")
raise
@comfy_node(name="LTXVAddLatents")
class LTXVAddLatents:
"""
Concatenates two video latents along the frames dimension.
Features:
- Validates dimension compatibility
- Handles device placement
- Preserves noise masks with proper handling
- Supports batch processing
"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latents1": ("LATENT",),
"latents2": ("LATENT",),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "add_latents"
CATEGORY = "latent/video"
def add_latents(
self, latents1: torch.Tensor, latents2: torch.Tensor
) -> torch.Tensor:
"""
Concatenates two video latents along the frames dimension.
Args:
latents1 (dict): First video latent dictionary
latents2 (dict): Second video latent dictionary
Returns:
tuple: Contains concatenated latent dictionary
Raises:
ValueError: If latent dimensions don't match
RuntimeError: If tensor operations fail
"""
try:
s = latents1.copy()
video_latent1 = latents1["samples"]
video_latent2 = latents2["samples"]
# Ensure tensors are on the same device
target_device = video_latent1.device
video_latent2 = video_latent2.to(target_device)
# Validate dimensions
self._validate_dimensions(video_latent1, video_latent2)
# Concatenate along frames dimension
s["samples"] = torch.cat([video_latent1, video_latent2], dim=2)
# Handle noise masks
s["noise_mask"] = self._merge_noise_masks(
latents1, latents2, video_latent1.shape[2], video_latent2.shape[2]
)
return (s,)
except Exception as e:
print(f"[LTXVAddLatents] Error: {str(e)}")
raise
def _validate_dimensions(self, latent1: torch.Tensor, latent2: torch.Tensor):
"""Validates that latent dimensions match except for frames."""
b1, c1, f1, h1, w1 = latent1.shape
b2, c2, f2, h2, w2 = latent2.shape
if not (b1 == b2 and c1 == c2 and h1 == h2 and w1 == w2):
raise ValueError(
f"Latent dimensions must match (except frames dimension).\n"
f"Got shapes {latent1.shape} and {latent2.shape}"
)
def _merge_noise_masks(
self, latents1: torch.Tensor, latents2: torch.Tensor, frames1: int, frames2: int
) -> Optional[torch.Tensor]:
"""Merges noise masks from both latents with proper handling."""
if "noise_mask" in latents1 and "noise_mask" in latents2:
return torch.cat([latents1["noise_mask"], latents2["noise_mask"]], dim=2)
elif "noise_mask" in latents1:
zeros = torch.zeros_like(latents1["noise_mask"][:, :, :frames2, :, :])
return torch.cat([latents1["noise_mask"], zeros], dim=2)
elif "noise_mask" in latents2:
zeros = torch.zeros_like(latents2["noise_mask"][:, :, :frames1, :, :])
return torch.cat([zeros, latents2["noise_mask"]], dim=2)
return None
@comfy_node(name="LTXVSetVideoLatentNoiseMasks")
class LTXVSetVideoLatentNoiseMasks:
"""
Applies multiple masks to a video latent.
Features:
- Supports multiple input mask formats (2D, 3D, 4D)
- Automatically handles fewer masks than frames by reusing the last mask
- Resizes masks to match latent dimensions
- Preserves batch processing capabilities
Input Formats:
- 2D mask: Single mask [H, W]
- 3D mask: Multiple masks [M, H, W]
- 4D mask: Multiple masks with channels [M, C, H, W]
"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"samples": ("LATENT",),
"masks": ("MASK",),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "set_mask"
CATEGORY = "latent/video"
def set_mask(self, samples: dict, masks: torch.Tensor) -> tuple:
"""
Applies masks to video latent frames.
Args:
samples (dict): Video latent dictionary containing 'samples' tensor
masks (torch.Tensor): Mask tensor in various possible formats
- 2D: [H, W] single mask
- 3D: [M, H, W] multiple masks
- 4D: [M, C, H, W] multiple masks with channels
Returns:
tuple: Contains modified latent dictionary with applied masks
Raises:
ValueError: If mask dimensions are unsupported
RuntimeError: If tensor operations fail
"""
try:
s = samples.copy()
video_latent = s["samples"]
batch_size, channels, num_frames, height, width = video_latent.shape
# Initialize noise_mask if not present
if "noise_mask" not in s:
s["noise_mask"] = torch.zeros(
(batch_size, 1, num_frames, height, width),
dtype=video_latent.dtype,
device=video_latent.device,
)
# Process masks
masks_reshaped = self._reshape_masks(masks)
M = masks_reshaped.shape[0]
resized_masks = self._resize_masks(masks_reshaped, height, width)
# Apply masks efficiently
self._apply_masks(s["noise_mask"], resized_masks, num_frames, M)
return (s,)
except Exception as e:
print(f"[LTXVSetVideoLatentNoiseMasks] Error: {str(e)}")
raise
def _reshape_masks(self, masks: torch.Tensor) -> torch.Tensor:
"""Reshapes input masks to consistent 4D format."""
original_shape = tuple(masks.shape)
ndims = masks.ndim
if ndims == 2:
return masks.unsqueeze(0).unsqueeze(0)
elif ndims == 3:
return masks.reshape(masks.shape[0], 1, masks.shape[1], masks.shape[2])
elif ndims == 4:
return masks.reshape(masks.shape[0], 1, masks.shape[2], masks.shape[3])
else:
raise ValueError(
f"Unsupported 'masks' dimension: {original_shape}. "
"Must be 2D (H,W), 3D (M,H,W), or 4D (M,C,H,W)."
)
def _resize_masks(
self, masks: torch.Tensor, height: int, width: int
) -> torch.Tensor:
"""Resizes all masks to match latent dimensions."""
return torch.nn.functional.interpolate(
masks, size=(height, width), mode="bilinear", align_corners=False
)
def _apply_masks(
self,
noise_mask: torch.Tensor,
resized_masks: torch.Tensor,
num_frames: int,
M: int,
) -> None:
"""Applies resized masks to all frames."""
for f in range(num_frames):
mask_idx = min(f, M - 1) # Reuse last mask if we run out
noise_mask[:, :, f] = resized_masks[mask_idx]
@comfy_node(name="LTXVAddLatentGuide")
class LTXVAddLatentGuide(nodes_lt.LTXVAddGuide):
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"vae": ("VAE",),
"positive": ("CONDITIONING",),
"negative": ("CONDITIONING",),
"latent": ("LATENT",),
"guiding_latent": ("LATENT",),
"latent_idx": (
"INT",
{
"default": 0,
"min": -9999,
"max": 9999,
"step": 1,
"tooltip": "Latent index to start the conditioning at. Can be negative to"
"indicate that the conditioning is on the frames before the latent.",
},
),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}),
}
}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
CATEGORY = "ltxtricks"
FUNCTION = "generate"
def generate(
self, vae, positive, negative, latent, guiding_latent, latent_idx, strength
):
noise_mask = nodes_lt.get_noise_mask(latent)
latent = latent["samples"]
guiding_latent = guiding_latent["samples"]
scale_factors = vae.downscale_index_formula
if latent_idx <= 0:
frame_idx = latent_idx * scale_factors[0]
else:
frame_idx = 1 + (latent_idx - 1) * scale_factors[0]
positive, negative, latent, noise_mask = self.append_keyframe(
positive=positive,
negative=negative,
frame_idx=frame_idx,
latent_image=latent,
noise_mask=noise_mask,
guiding_latent=guiding_latent,
strength=strength,
scale_factors=scale_factors,
)
return (
positive,
negative,
{"samples": latent, "noise_mask": noise_mask},
)