| |
| """ |
| finalize_ts.py β Add tracked-change metadata updates to a TS DOCX after CR application. |
| |
| Three edits are made (all as tracked changes): |
| 1. New row in the Change History table (second-to-last table, Annex V) |
| 2. New row in the History table (last table, last page) |
| 3. Version + date update in the first paragraph (title) |
| |
| Usage: |
| python3 finalize_ts.py <ts_docx> <cr_docx> [--author "Name"] [--output <path>] |
| """ |
|
|
| import argparse |
| import re |
| import sys |
| from datetime import date, timedelta |
| from pathlib import Path |
|
|
| import docx |
|
|
| sys.path.insert(0, str(Path(__file__).parent)) |
| from docx_helpers import ( |
| RevCounter, |
| tracked_insert_table_row, |
| tracked_modify_para_multi, |
| AUTHOR, |
| DATE, |
| ) |
| from fetch_crs import _find_cover_table |
|
|
|
|
| |
|
|
| def to_wsl_path(p: str) -> str: |
| """Convert Windows paths (C:\\...) to WSL paths (/mnt/c/...).""" |
| if p.startswith(('C:\\', 'c:\\', 'D:\\', 'd:\\')): |
| drive = p[0].lower() |
| rest = p[2:].replace('\\', '/') |
| return f'/mnt/{drive}{rest}' |
| return p |
|
|
|
|
| |
|
|
| def compute_pub_date(): |
| """ |
| Return (yyyy-mm, "Month YYYY") using the 5-day rule: |
| if today is within 5 days of the next month's first day, use next month; |
| otherwise use the current month. |
| """ |
| today = date.today() |
| first_next = (today.replace(day=1) + timedelta(days=32)).replace(day=1) |
| days_until = (first_next - today).days |
| target = first_next if days_until <= 5 else today.replace(day=1) |
| return target.strftime('%Y-%m'), target.strftime('%B %Y') |
|
|
|
|
| def derive_new_version(v: str) -> str: |
| """Increment middle component of X.Y.Z β X.(Y+1).0.""" |
| parts = v.split('.') |
| if len(parts) < 3: |
| parts += ['0'] * (3 - len(parts)) |
| parts[1] = str(int(parts[1]) + 1) |
| parts[2] = '0' |
| return '.'.join(parts) |
|
|
|
|
| |
|
|
| def extract_cr_metadata(cr_docx_path: str) -> dict: |
| """ |
| Open the CR DOCX and read metadata from tables[0] (cover page table). |
| Returns dict with keys: |
| meeting_id, uid, cr_num, rev, cat, title, current_version |
| """ |
| doc = docx.Document(cr_docx_path) |
| if not doc.tables: |
| raise ValueError('CR has no tables β cannot extract metadata') |
|
|
| tbl = _find_cover_table(doc) |
| if tbl is None: |
| raise ValueError('CR cover table not found β no table containing "CHANGE REQUEST"') |
|
|
| |
| cells = [] |
| for row in tbl.rows: |
| for cell in row.cells: |
| cells.append(cell.text.strip()) |
|
|
| meta = { |
| 'meeting_id': '', |
| 'uid': '', |
| 'cr_num': '', |
| 'rev': '', |
| 'cat': '', |
| 'title': '', |
| 'current_version': '', |
| } |
|
|
| |
| |
| meeting_text = '' |
| for c in cells: |
| if 'Meeting #' in c or 'Meeting#' in c: |
| meeting_text = c |
| break |
|
|
| if meeting_text: |
| |
| body_match = re.search(r'(\w+)\s+Meeting\s*#', meeting_text) |
| body = body_match.group(1) if body_match else '' |
| |
| num_match = re.search(r'Meeting\s*#\s*(\d+)', meeting_text) |
| number = num_match.group(1) if num_match else '' |
| meta['meeting_id'] = f'{body}-{number}' if body and number else meeting_text |
|
|
| |
| |
| uid_pat = re.compile(r'[A-Z]+\(\d+\)\d+\S*') |
| for c in cells: |
| m = uid_pat.search(c) |
| if m: |
| meta['uid'] = m.group(0) |
| break |
|
|
| |
| |
| label_map = { |
| 'CR': 'cr_num', |
| 'Rev': 'rev', |
| 'Curr. vers': 'current_version', |
| 'Current version': 'current_version', |
| 'Cat': 'cat', |
| 'Category': 'cat', |
| } |
| title_next = False |
| for i, c in enumerate(cells): |
| stripped = c.strip().rstrip(':') |
|
|
| |
| if stripped.lower() in ('title', 'title of change'): |
| title_next = True |
| continue |
| if title_next: |
| if c.strip(): |
| meta['title'] = c.strip() |
| title_next = False |
| continue |
|
|
| for label, key in label_map.items(): |
| if stripped == label or stripped.startswith(label): |
| |
| for j in range(i + 1, min(i + 4, len(cells))): |
| val = cells[j].strip() |
| if val: |
| meta[key] = val |
| break |
| break |
|
|
| return meta |
|
|
|
|
| |
|
|
| def _detect_meeting_separator(tbl): |
| """ |
| Scan the meeting column (col index 1) of the Change History table bottom-up. |
| Find the last non-empty cell and detect the separator between body letters and |
| number, e.g. '#' in 'SET#115' or '-' in 'SET-119'. |
| Returns the detected separator character, defaulting to '#'. |
| """ |
| meet_col = 1 |
| |
| header_row = tbl.rows[1] if len(tbl.rows) > 1 else tbl.rows[0] |
| for c_idx, cell in enumerate(header_row.cells): |
| if any(kw in cell.text.lower() for kw in ('meeting', 'body', 'tsg')): |
| meet_col = c_idx |
| break |
| for row in reversed(tbl.rows): |
| cells = row.cells |
| if len(cells) > meet_col: |
| text = cells[meet_col].text.strip() |
| if text: |
| m = re.search(r'[A-Za-z]([^A-Za-z0-9])\d', text) |
| if m: |
| return m.group(1) |
| return '#' |
|
|
|
|
| |
|
|
| class NoChangeHistoryTable(Exception): |
| """Raised when the document contains no recognisable Change History table.""" |
| pass |
|
|
|
|
| def find_change_history_table(ts_doc): |
| """ |
| Find the Change History table by looking for a first row whose text |
| contains "Change history" (the merged title cell that ETSI places at the |
| top of the annex table). |
| |
| Raises NoChangeHistoryTable when no such table is found. |
| """ |
| for tbl in ts_doc.tables: |
| if not tbl.rows: |
| continue |
| r0_text = ' '.join(c.text.strip() for c in tbl.rows[0].cells) |
| if 'Change history' in r0_text: |
| return tbl |
| raise NoChangeHistoryTable( |
| 'No Change History table found in this document ' |
| '(no table whose first row contains "Change history")' |
| ) |
|
|
|
|
| def find_history_table(ts_doc): |
| """Return ts_doc.tables[-1] (History / last page). Validates 3 columns.""" |
| tbl = ts_doc.tables[-1] |
| last_row = tbl.rows[-1] |
| if len(last_row.cells) != 3: |
| raise ValueError( |
| f'History table has {len(last_row.cells)} columns, expected 3' |
| ) |
| return tbl |
|
|
|
|
| |
|
|
| def update_change_history_table(ts_doc, meta, pub_yyyy_mm, old_v, new_v, rev, author, date_str): |
| tbl = find_change_history_table(ts_doc) |
| |
| header_row = tbl.rows[1] if len(tbl.rows) > 1 else tbl.rows[0] |
| ncols = len(header_row.cells) |
|
|
| |
| |
| sep = _detect_meeting_separator(tbl) |
| meeting_id = meta['meeting_id'] |
| if sep != '-' and '-' in meeting_id: |
| body, number = meeting_id.split('-', 1) |
| meeting_id = f'{body}{sep}{number}' |
|
|
| if ncols == 9: |
| |
| cell_texts = [ |
| pub_yyyy_mm, meeting_id, meta['uid'], |
| meta['cr_num'], meta['rev'], meta['cat'], |
| meta['title'], old_v, new_v, |
| ] |
| elif ncols == 8: |
| |
| first_header = tbl.rows[0].cells[0].text.strip() if tbl.rows else '' |
| if re.search(r'[Dd]ate', first_header): |
| |
| cell_texts = [ |
| pub_yyyy_mm, meeting_id, meta['uid'], |
| meta['cr_num'], meta['rev'], meta['cat'], |
| meta['title'], new_v, |
| ] |
| else: |
| |
| cell_texts = [ |
| meeting_id, meta['uid'], '', |
| meta['cr_num'], meta['rev'], meta['cat'], |
| meta['title'], new_v, |
| ] |
| else: |
| cell_texts = ([pub_yyyy_mm, meeting_id, meta['uid'], |
| meta['cr_num'], meta['rev'], meta['cat'], |
| meta['title'], old_v, new_v])[:ncols] |
|
|
| tracked_insert_table_row(tbl, cell_texts, rev, author, date_str) |
| return cell_texts |
|
|
|
|
| def update_history_table(ts_doc, new_v, pub_month_year, rev, author, date_str): |
| tbl = find_history_table(ts_doc) |
| cell_texts = [f'V{new_v}', pub_month_year, 'Publication'] |
| tracked_insert_table_row(tbl, cell_texts, rev, author, date_str) |
| return cell_texts |
|
|
|
|
| def update_title_para(ts_doc, old_v, new_v, old_date_str, new_date_str, rev, author, date_str): |
| """ |
| Update first paragraph: V<old_v>βV<new_v> and (old_date_str)β(new_date_str). |
| Both replacements are applied in a single tracked multi-replace pass. |
| """ |
| para = ts_doc.paragraphs[0] |
| replacements = [ |
| (f'V{old_v}', f'V{new_v}'), |
| (f'({old_date_str})', f'({new_date_str})'), |
| ] |
| tracked_modify_para_multi(para, replacements, rev, author, date_str) |
|
|
|
|
| |
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description='Add tracked-change metadata updates to a TS DOCX after CR application.' |
| ) |
| parser.add_argument('ts_docx', help='TS DOCX file to update') |
| parser.add_argument('cr_docx', help='CR DOCX file to read metadata from') |
| parser.add_argument('--author', default=AUTHOR, help='Tracked change author name') |
| parser.add_argument('--output', default=None, help='Output path (default: <ts>_finalized.docx)') |
| args = parser.parse_args() |
|
|
| ts_path = to_wsl_path(args.ts_docx) |
| cr_path = to_wsl_path(args.cr_docx) |
|
|
| |
| if args.output: |
| out_path = to_wsl_path(args.output) |
| else: |
| p = Path(ts_path) |
| out_path = str(p.parent / (p.stem + '_finalized.docx')) |
|
|
| print(f'TS: {ts_path}') |
| print(f'CR: {cr_path}') |
| print(f'Output: {out_path}') |
| print() |
|
|
| |
| ts_doc = docx.Document(ts_path) |
| cr_doc = docx.Document(cr_path) |
|
|
| |
| print('Extracting CR metadata...') |
| meta = extract_cr_metadata(cr_path) |
| print(f" Meeting ID: {meta['meeting_id']}") |
| print(f" UID: {meta['uid']}") |
| print(f" CR#: {meta['cr_num']}") |
| print(f" Rev: {meta['rev']}") |
| print(f" Category: {meta['cat']}") |
| print(f" Title: {meta['title']}") |
| print(f" Current version: {meta['current_version']}") |
| print() |
|
|
| |
| pub_ym, pub_month_year = compute_pub_date() |
| old_v = meta['current_version'] |
| new_v = derive_new_version(old_v) |
| print(f'Old version: {old_v} β New version: {new_v}') |
| print(f'Publication: {pub_month_year} ({pub_ym})') |
| print() |
|
|
| |
| title_text = ts_doc.paragraphs[0].text |
| date_match = re.search(r'\((\d{4}-\d{2})\)', title_text) |
| if not date_match: |
| print(f'WARNING: Could not find date pattern (YYYY-MM) in first paragraph:') |
| print(f' {title_text!r}') |
| old_date_str = '' |
| else: |
| old_date_str = date_match.group(1) |
| print(f'Title paragraph: {title_text!r}') |
| print(f'Old date: {old_date_str} β New date: {pub_ym}') |
| print() |
|
|
| |
| rev = RevCounter(ts_doc) |
| tc_date = DATE |
|
|
| |
| print('Inserting row in Change History table (Annex V)...') |
| ch_cells = update_change_history_table(ts_doc, meta, pub_ym, old_v, new_v, rev, args.author, tc_date) |
| print(f' Row: {ch_cells}') |
|
|
| print('Inserting row in History table (last page)...') |
| h_cells = update_history_table(ts_doc, new_v, pub_month_year, rev, args.author, tc_date) |
| print(f' Row: {h_cells}') |
|
|
| if old_date_str: |
| print('Updating title paragraph...') |
| update_title_para(ts_doc, old_v, new_v, old_date_str, pub_ym, rev, args.author, tc_date) |
| print(f' V{old_v} β V{new_v}, ({old_date_str}) β ({pub_ym})') |
| else: |
| print('Skipping title paragraph update (no date found).') |
|
|
| |
| ts_doc.save(out_path) |
| print() |
| print(f'Saved: {out_path}') |
| print() |
| print('Summary of tracked changes:') |
| print(f' [Change History] New row: {ch_cells}') |
| print(f' [History] New row: {h_cells}') |
| if old_date_str: |
| print(f' [Title] V{old_v} β V{new_v}, ({old_date_str}) β ({pub_ym})') |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|