Skip to content

Commit dfb10a9

Browse files
committed
Add API endpoint for background job progress
1 parent ca1cbe7 commit dfb10a9

File tree

4 files changed

+85
-2
lines changed

4 files changed

+85
-2
lines changed

backend/btrixcloud/background_jobs.py

+58
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
StorageRef,
2727
User,
2828
SuccessResponse,
29+
JobProgress,
2930
)
3031
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
3132
from .utils import dt_now
@@ -463,6 +464,52 @@ def _get_job_by_type_from_data(self, data: dict[str, object]):
463464

464465
return DeleteOrgJob.from_dict(data)
465466

467+
async def get_job_progress(self, job_id: str) -> JobProgress:
468+
"""Return progress of background job for supported types"""
469+
job = await self.get_background_job(job_id)
470+
471+
if job.type != BgJobType.COPY_BUCKET:
472+
raise HTTPException(status_code=403, detail="job_type_not_supported")
473+
474+
if job.success is False:
475+
raise HTTPException(status_code=400, detail="job_failed")
476+
477+
if job.finished:
478+
return JobProgress(percentage=1.0)
479+
480+
log_tail = await self.crawl_manager.tail_background_job(job_id)
481+
if not log_tail:
482+
raise HTTPException(status_code=400, detail="job_log_not_available")
483+
484+
lines = log_tail.splitlines()
485+
reversed_lines = list(reversed(lines))
486+
487+
progress = JobProgress(percentage=0.0)
488+
489+
# Parse lines in reverse order until we find one with latest stats
490+
for line in reversed_lines:
491+
try:
492+
if "ETA" not in line:
493+
continue
494+
495+
stats_groups = line.split(",")
496+
for group in stats_groups:
497+
group = group.strip()
498+
if "%" in group:
499+
progress.percentage = float(group.strip("%")) / 100
500+
if "ETA" in group:
501+
eta_str = group.strip("ETA ")
502+
# Split on white space to remove byte mark rclone sometimes
503+
# adds to end of stats line
504+
eta_list = eta_str.split(" ")
505+
progress.eta = eta_list[0]
506+
507+
break
508+
except:
509+
continue
510+
511+
return progress
512+
466513
async def list_background_jobs(
467514
self,
468515
org: Organization,
@@ -672,6 +719,17 @@ async def get_background_job(
672719
"""Retrieve information for background job"""
673720
return await ops.get_background_job(job_id, org.id)
674721

722+
@router.get(
723+
"/{job_id}/progress",
724+
response_model=JobProgress,
725+
)
726+
async def get_job_progress(
727+
job_id: str,
728+
org: Organization = Depends(org_crawl_dep),
729+
):
730+
"""Return progress information for background job"""
731+
return await ops.get_job_progress(job_id)
732+
675733
@app.get("/orgs/all/jobs/{job_id}", response_model=AnyJob, tags=["jobs"])
676734
async def get_background_job_all_orgs(job_id: str, user: User = Depends(user_dep)):
677735
"""Get background job from any org"""

backend/btrixcloud/crawlmanager.py

+16
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,22 @@ async def delete_crawl_config_by_id(self, cid: str) -> None:
344344
"""Delete all crawl configs by id"""
345345
await self._delete_crawl_configs(f"btrix.crawlconfig={cid}")
346346

347+
async def tail_background_job(self, job_id: str) -> str:
348+
"""Tail running background job pod"""
349+
pods = await self.core_api.list_namespaced_pod(
350+
namespace=self.namespace,
351+
label_selector=f"batch.kubernetes.io/job-name={job_id}",
352+
)
353+
354+
if not pods.items:
355+
return ""
356+
357+
pod_name = pods.items[0].metadata.name
358+
359+
return await self.core_api.read_namespaced_pod_log(
360+
pod_name, self.namespace, tail_lines=10
361+
)
362+
347363
# ========================================================================
348364
# Internal Methods
349365
async def _delete_crawl_configs(self, label) -> None:

backend/btrixcloud/models.py

+8
Original file line numberDiff line numberDiff line change
@@ -2099,6 +2099,14 @@ class CopyBucketJob(BackgroundJob):
20992099
]
21002100

21012101

2102+
# ============================================================================
2103+
class JobProgress(BaseModel):
2104+
"""Model for reporting background job progress"""
2105+
2106+
percentage: float
2107+
eta: Optional[str] = None
2108+
2109+
21022110
# ============================================================================
21032111

21042112
### PAGES ###

chart/app-templates/copy_job.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ kind: Job
33
metadata:
44
name: "{{ id }}"
55
labels:
6+
job-id: "{{ id }}"
67
role: "background-job"
78
job_type: {{ job_type }}
89
btrix.org: {{ oid }}
910

1011
spec:
11-
ttlSecondsAfterFinished: 60
12+
ttlSecondsAfterFinished: 30
1213
backoffLimit: 3
1314
template:
1415
spec:
@@ -86,7 +87,7 @@ spec:
8687
- name: RCLONE_CONFIG_NEW_ENDPOINT
8788
value: "{{ new_endpoint }}"
8889

89-
command: ["rclone", "-vv", "--progress", "copy", "--checksum", "--use-mmap", "--transfers=2", "--checkers=2", "prev:{{ prev_bucket }}{{ oid }}", "new:{{ new_bucket }}{{ oid }}"]
90+
command: ["rclone", "-v", "--stats-one-line", "--stats", "10s", "copy", "--checksum", "--use-mmap", "--transfers=2", "--checkers=2", "prev:{{ prev_bucket }}{{ oid }}", "new:{{ new_bucket }}{{ oid }}"]
9091
resources:
9192
limits:
9293
memory: "350Mi"

0 commit comments

Comments
 (0)