Skip to content

Commit 1cd4ad6

Browse files
vanceingallsclaude
andauthored
feat(core): sourceMutation data-hf-id targeting (R1, T7) (heygen-com#1272)
* feat(core): clip-model hf- ids minted at parse, emitted as data-hf-id (R1) * docs(core): document legacy-id round-trip in clip-model readback (R1 review) Addresses Rames' review on heygen-com#1270: clarifies that a pre-R1 clip authored with id="my-title" round-trips as data-hf-id="my-title" (non-hf-shaped but stable, exact-match) by design — targeting uses exact [data-hf-id="…"] match and does not require the hf- shape; legacy values re-mint only at the R7 write-back. Not a bug. Comment-only. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * docs(core): fix misleading legacy-id migration comment in htmlParser.ts The original comment said legacy data-hf-id values "are re-minted only once the R7 write-back persists freshly-minted ids to source" — which is incorrect. ensureHfIds skips elements that already carry data-hf-id, so legacy values (e.g. data-hf-id="my-title") persist indefinitely and are NOT automatically re-minted. Exact-match targeting still works correctly. Update comment to reflect actual behaviour. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * feat(studio): sourcePatcher data-hf-id targeting (R1, T3) * fix(studio): warn on duplicate match in execDataAttrPattern (R1, T3 review) Addresses Rames' review on heygen-com#1271: execDataAttrPattern returned the first regex match without checking for a second. A duplicate id/data-hf-id in source (id drift) would silently patch one element and leave the other stale. Now warns when more than one element matches. By the mint contract it should never fire. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * test(studio): pin hfId-is-authoritative-over-selector contract (R1, T3 review) Adds test: "hfId match is authoritative — selector is not used as a narrowing filter". When hfId matches element A and selector points at element B, findTagByTarget returns A without consulting selector as a narrowing filter. Pins the intended behaviour so a future refactor cannot silently start narrowing by selector. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * feat(core): sourceMutation data-hf-id targeting (R1, T7) * test(core): update htmlParser baselines for R1 hf- id format Elements now get data-hf-id minted by ensureHfIds; parser reads data-hf-id as model id, so HTML id attrs are no longer the model id. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(core): data-hf-id survives id/selector patch (R1, T7) Locks the preservation guarantee the write-back design depends on: a Studio edit targeting by id or selector (it never sends hfId) must not strip an existing data-hf-id, or the stable handle is destroyed by the next edit. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(core): escape hfId in selector + warn on duplicate match (R1, T7 review) Addresses review on heygen-com#1272 (Miguel P3 + Rames): findTargetElement interpolated target.hfId raw into a [data-hf-id="..."] selector. Escape it (CSS attr-value injection guard) and warn when a hfId matches more than one element instead of silently patching an arbitrary one. Adds an injection-guard test. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 42c247d commit 1cd4ad6

3 files changed

Lines changed: 133 additions & 18 deletions

File tree

packages/core/src/parsers/htmlParser.test.ts

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ describe("parseHtml", () => {
2626
const result = parseHtml(html);
2727

2828
expect(result.elements).toHaveLength(2);
29-
expect(result.elements[0].id).toBe("text1");
29+
expect(result.elements[0].id).toMatch(/^hf-[a-z0-9]{4}$/);
3030
expect(result.elements[0].startTime).toBe(0);
3131
expect(result.elements[0].duration).toBe(5);
3232
expect(result.elements[0].name).toBe("Title");
3333
expect(result.elements[0].type).toBe("text");
3434

35-
expect(result.elements[1].id).toBe("text2");
35+
expect(result.elements[1].id).toMatch(/^hf-[a-z0-9]{4}$/);
3636
expect(result.elements[1].startTime).toBe(2);
3737
expect(result.elements[1].duration).toBe(5);
3838
});
@@ -53,7 +53,7 @@ describe("parseHtml", () => {
5353

5454
expect(result.elements).toHaveLength(1);
5555
expect(result.elements[0].type).toBe("composition");
56-
expect(result.elements[0].id).toBe("comp1");
56+
expect(result.elements[0].id).toMatch(/^hf-[a-z0-9]{4}$/);
5757
if (result.elements[0].type === "composition") {
5858
expect(result.elements[0].compositionId).toBe("abc123");
5959
expect(result.elements[0].src).toBe("/compositions/abc123");
@@ -76,20 +76,20 @@ describe("parseHtml", () => {
7676

7777
expect(result.elements).toHaveLength(3);
7878

79-
const video = result.elements.find((e) => e.id === "vid1");
79+
const video = result.elements.find((e) => e.type === "video");
8080
expect(video).toBeDefined();
81-
expect(video?.type).toBe("video");
81+
expect(video?.id).toMatch(/^hf-[a-z0-9]{4}$/);
8282
if (video?.type === "video") {
8383
expect(video.src).toBe("video.mp4");
8484
}
8585

86-
const audio = result.elements.find((e) => e.id === "aud1");
86+
const audio = result.elements.find((e) => e.type === "audio");
8787
expect(audio).toBeDefined();
88-
expect(audio?.type).toBe("audio");
88+
expect(audio?.id).toMatch(/^hf-[a-z0-9]{4}$/);
8989

90-
const img = result.elements.find((e) => e.id === "img1");
90+
const img = result.elements.find((e) => e.type === "image");
9191
expect(img).toBeDefined();
92-
expect(img?.type).toBe("image");
92+
expect(img?.id).toMatch(/^hf-[a-z0-9]{4}$/);
9393
});
9494

9595
it("handles missing attributes gracefully", () => {
@@ -398,9 +398,12 @@ describe("parseHtml", () => {
398398
`;
399399
const result = parseHtml(html);
400400

401-
expect(result.keyframes["text1"]).toBeDefined();
402-
expect(result.keyframes["text1"]).toHaveLength(2);
403-
expect(result.keyframes["text1"][0].id).toBe("kf1");
401+
const elId = result.elements[0]?.id ?? "";
402+
expect(elId).toMatch(/^hf-[a-z0-9]{4}$/);
403+
const kfs = result.keyframes[elId];
404+
expect(kfs).toBeDefined();
405+
expect(kfs).toHaveLength(2);
406+
expect(kfs[0].id).toBe("kf1");
404407
});
405408

406409
it("parses stage zoom keyframes", () => {

packages/core/src/studio-api/helpers/sourceMutation.test.ts

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -368,13 +368,90 @@ describe("probeElementInSource", () => {
368368
// Covers the same surface as T3 (Studio sourcePatcher) — Core sourceMutation supports
369369
// all patch types (inline-style, attribute, text-content) via patchElementInHtml.
370370
describe("T7 — data-hf-id targeting (spec for R1)", () => {
371-
it.todo("updates inline style by data-hf-id when no HTML id attribute is present");
371+
it("updates inline style by data-hf-id when no HTML id attribute is present", () => {
372+
const source = `<h1 data-hf-id="hf-x7k2" style="color: red">Hello</h1>`;
373+
const { html, matched } = patchElementInHtml(source, { hfId: "hf-x7k2" }, [
374+
{ type: "inline-style", property: "color", value: "blue" },
375+
]);
376+
expect(matched).toBe(true);
377+
expect(html).toMatch(/color:\s*blue/);
378+
expect(html).toContain('data-hf-id="hf-x7k2"');
379+
});
372380

373-
it.todo("updates text content by data-hf-id");
381+
it("updates text content by data-hf-id", () => {
382+
const source = `<p data-hf-id="hf-a1b2">Old text</p>`;
383+
const { html, matched } = patchElementInHtml(source, { hfId: "hf-a1b2" }, [
384+
{ type: "text-content", property: "", value: "New text" },
385+
]);
386+
expect(matched).toBe(true);
387+
expect(html).toContain("New text");
388+
});
374389

375-
it.todo("updates attribute by data-hf-id");
390+
it("updates attribute by data-hf-id", () => {
391+
const source = `<div data-hf-id="hf-c3d4" data-start="0"></div>`;
392+
const { html, matched } = patchElementInHtml(source, { hfId: "hf-c3d4" }, [
393+
{ type: "attribute", property: "start", value: "2.5" },
394+
]);
395+
expect(matched).toBe(true);
396+
expect(html).toContain('data-start="2.5"');
397+
});
376398

377-
it.todo("data-hf-id attribute survives the patch (can be targeted again)");
399+
it("data-hf-id attribute survives the patch (can be targeted again)", () => {
400+
const source = `<h1 data-hf-id="hf-x7k2" style="color: red">Hello</h1>`;
401+
const { html } = patchElementInHtml(source, { hfId: "hf-x7k2" }, [
402+
{ type: "inline-style", property: "color", value: "blue" },
403+
]);
404+
expect(html).toContain('data-hf-id="hf-x7k2"');
405+
});
378406

379-
it.todo("hfId lookup falls through to selector when hfId is not found in the document");
407+
it("hfId lookup falls through to selector when hfId is not found in the document", () => {
408+
const source = `<h1 class="headline" style="color: red">Hello</h1>`;
409+
const { html, matched } = patchElementInHtml(
410+
source,
411+
{ hfId: "hf-missing", selector: ".headline" },
412+
[{ type: "inline-style", property: "color", value: "blue" }],
413+
);
414+
expect(matched).toBe(true);
415+
expect(html).toMatch(/color:\s*blue/);
416+
});
417+
418+
it("does not break out of the selector on a crafted hfId (CSS injection guard)", () => {
419+
// A value with a quote/bracket must be escaped, not injected — it should
420+
// simply match nothing and leave the source untouched, never throw.
421+
const source = `<h1 class="safe">A</h1><h1 class="victim">B</h1>`;
422+
const evil = `x"] , [class="victim`;
423+
const run = () =>
424+
patchElementInHtml(source, { hfId: evil }, [
425+
{ type: "text-content", property: "textContent", value: "HACKED" },
426+
]);
427+
expect(run).not.toThrow();
428+
const { html, matched } = run();
429+
expect(matched).toBe(false);
430+
expect(html).toBe(source);
431+
expect(html).not.toContain("HACKED");
432+
});
433+
434+
// The Studio edit path targets by id/selector (it never sends hfId). Once a
435+
// persisted data-hf-id exists in source, those edits must NOT strip it — else
436+
// the stable handle is destroyed by the next edit. This is the preservation
437+
// guarantee the write-back design depends on.
438+
it("preserves an existing data-hf-id when the element is patched by id", () => {
439+
const source = `<h1 id="hero" data-hf-id="hf-x7k2" style="color: red">Hello</h1>`;
440+
const { html, matched } = patchElementInHtml(source, { id: "hero" }, [
441+
{ type: "inline-style", property: "color", value: "blue" },
442+
]);
443+
expect(matched).toBe(true);
444+
expect(html).toMatch(/color:\s*blue/);
445+
expect(html).toContain('data-hf-id="hf-x7k2"');
446+
});
447+
448+
it("preserves an existing data-hf-id when the element is patched by selector", () => {
449+
const source = `<p class="body" data-hf-id="hf-a1b2">Old</p>`;
450+
const { html, matched } = patchElementInHtml(source, { selector: ".body" }, [
451+
{ type: "text-content", property: "textContent", value: "New" },
452+
]);
453+
expect(matched).toBe(true);
454+
expect(html).toContain("New");
455+
expect(html).toContain('data-hf-id="hf-a1b2"');
456+
});
380457
});

packages/core/src/studio-api/helpers/sourceMutation.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { parseHTML } from "linkedom";
22

33
export interface SourceMutationTarget {
44
id?: string | null;
5+
hfId?: string;
56
selector?: string;
67
selectorIndex?: number;
78
}
@@ -31,7 +32,41 @@ function querySelectorAllWithTemplates(root: Document | Element, selector: strin
3132
return [];
3233
}
3334

35+
// Prevent CSS attribute-selector injection via a crafted hfId: escape
36+
// backslashes first, then double-quotes. Keeps a malformed/hostile value from
37+
// breaking out of the `[data-hf-id="…"]` selector once callers beyond the
38+
// internal mint contract (R2+ user flows) pass values here.
39+
function escapeCssAttrValue(value: string): string {
40+
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
41+
}
42+
43+
function findByHfId(document: Document, hfId: string): Element | null {
44+
try {
45+
const matches = querySelectorAllWithTemplates(
46+
document,
47+
`[data-hf-id="${escapeCssAttrValue(hfId)}"]`,
48+
);
49+
if (matches.length > 1) {
50+
// The mint contract guarantees uniqueness; a duplicate means upstream
51+
// id drift. Don't silently patch an arbitrary one — surface it.
52+
// eslint-disable-next-line no-console
53+
console.warn(
54+
`sourceMutation: data-hf-id "${hfId}" matched ${matches.length} elements; using the first. ids must be unique per document.`,
55+
);
56+
}
57+
return matches[0] ?? null;
58+
} catch {
59+
// Malformed selector despite escaping — let the caller fall back.
60+
return null;
61+
}
62+
}
63+
3464
function findTargetElement(document: Document, target: SourceMutationTarget): Element | null {
65+
if (target.hfId) {
66+
const el = findByHfId(document, target.hfId);
67+
if (el) return el;
68+
}
69+
3570
if (target.id) {
3671
const byId = document.getElementById(target.id);
3772
if (byId) return byId;
@@ -207,7 +242,7 @@ export function patchElementInHtml(
207242
}
208243

209244
export function probeElementInSource(source: string, target: SourceMutationTarget): boolean {
210-
if (!target.id && !target.selector) return false;
245+
if (!target.id && !target.hfId && !target.selector) return false;
211246
const { document } = parseSourceDocument(source);
212247
const el = findTargetElement(document, target);
213248
return el != null && isHTMLElement(el);

0 commit comments

Comments
 (0)