
Helper functions to interact with the HRM.

  1"""Helper functions to interact with the HRM."""
  3import os.path
  4import re
  5import shlex
  7from bs4 import BeautifulSoup
  8from loguru import logger as log
 11def parse_config(filename):
 12    """Assemble a dict from the HRM config file (shell syntax).
 14    Usually, the config is located at /etc/hrm.conf and written in shell syntax as
 15    this file simply gets sourced by the bash init script and other shell based
 16    tools.
 18    Parameters
 19    ----------
 20    filename : str
 21        The name of the configuration file to be parsed.
 23    Returns
 24    -------
 25    dict
 26        A dict with the parsed configuration items.
 28    Raises
 29    ------
 30    SyntaxError
 31        Raised in case the given configuration file can't be parsed correctly.
 33    Example
 34    -------
 35    >>> parse_config('/etc/hrm.conf')
 36    ... {
 37    ...     'HRM_DATA': '/export/hrm_data',
 38    ...     'HRM_DEST': 'dst',
 39    ...     'HRM_HOME': '/var/www/hrm',
 40    ...     'HRM_LOG': '/var/log/hrm',
 41    ...     'HRM_SOURCE': 'src',
 42    ...     'OMERO_HOSTNAME': 'omero.mynetwork.xy',
 43    ...     'OMERO_PKG': '/opt/OMERO/OMERO.server',
 44    ...     'OMERO_PORT': '4064',
 45    ...     'PHP_CLI': '/usr/local/php/bin/php',
 46    ...     'SUSER': 'hrm'
 47    ... }
 48    """
 49    log.debug(f"Trying to parse HRM configuration file [{filename}]...")
 50    config = dict()
 51    with open(filename, "r", encoding="utf-8") as file:
 52        body =
 54    lexer = shlex.shlex(body)
 55    lexer.wordchars += "-./"
 56    while True:
 57        token = lexer.get_token()
 58        if token is None or token == "":
 59            break
 60        # it's valid sh syntax to use a semicolon to join lines, so accept it:
 61        if token == ";":
 62            continue
 63        # we assume entries of the following form:
 64        # KEY="some-value"
 65        key = token
 66        try:
 67            equals = lexer.get_token()
 68            assert equals == "="
 69        except AssertionError:
 70            raise SyntaxError(  # pylint: disable-msg=raise-missing-from
 71                f"Can't parse {filename}, invalid syntax in line {lexer.lineno} "
 72                f"(expected '=', found '{equals}')."
 73            )
 74        except Exception as err:  # pragma: no cover # pylint: disable-msg=broad-except
 75            log.warning(f"Error parsing config: {err}")
 76        value = lexer.get_token()
 77        value = value.replace('"', "")  # remove double quotes
 78        value = value.replace("'", "")  # remove single quotes
 79        config[key] = value
 80"Successfully parsed [{}].", filename)
 81    return config
 84def check_config(config):
 85    """Check the config dict for required entries.
 87    Parameters
 88    ----------
 89    config : dict
 90        A dict with a parsed configuration, as returned by `parse_hrm_conf()`.
 92    Raises
 93    ------
 94    SyntaxError
 95        Raised in case one of the required configuration items is missing.
 96    """
 97    required = ["OMERO_PKG", "OMERO_HOSTNAME"]
 98    for entry in required:
 99        if entry not in config:
100            raise SyntaxError(f"Missing '{entry}'' in the HRM config file.")
101    log.debug("HRM config file passed all checks.")
104def job_parameter_summary(fname):
105    """Generate a parameter summary text from the HRM-generated HTML file.
107    Call the parser for the HTML file generated by the HRM containing the parameter
108    summary and generate a plain-text version from the parsed results.
110    Parameters
111    ----------
112    fname : str
113        The filename of the job's HTML parameter summary.
115    Returns
116    -------
117    str
118        The formatted string containing the parameter summary.
119    """
120    parsed = parse_summary(fname)
121    if parsed is None:
122        return None
124    summary = ""
125    for section in parsed:
126        summary += f"{section}\n==============================\n"
127        for parameter in parsed[section]:
128            summary += f"{parameter}: {parsed[section][parameter]}\n"
129    log.debug(f"Job parameter summary:\n---\n{summary}---")
130    log.success("Generated parameter summary.")
131    return summary
134def parse_job_basename(fname):
135    """Parse the basename from an HRM job result file name.
137    HRM job IDs are generated via PHP's `uniqid()` call that is giving a 13-digit
138    hexadecimal string (8 digits UNIX time and 5 digits microsconds). The HRM labels its
139    result files by appending an underscore (`_`) followed by this ID and an `_hrm`
140    suffix. This function tries to match this section and remove everything *after* it
141    from the name.
143    Its intention is to safely remove the suffix from an image file name while taking no
144    assumptions about how the suffix looks like (could e.g. be `.ics`, `.ome.tif` or
145    similar).
147    Parameters
148    ----------
149    fname : str
150        The input string, usually the name of an HRM result file (but any string is
151        accepted).
153    Returns
154    -------
155    str
156        The input string (`fname`) where everything *after* an HRM-like job label (e.g.
157        `_abcdef0123456_hrm` or `_f435a27b9c85e_hrm`) is removed. In case the input
158        string does *not* contain a matching section it is returned
159    """
160    log.trace(fname)
161    basename = re.sub(r"(_[0-9a-f]{13}_hrm)\..*", r"\1", fname)
162    log.trace(basename)
163    return basename
166def parse_summary(fname):
167    """Parse the job parameter summary generated by HRM into a dict.
169    Parse the HTML file generated by the HRM containing the parameter summary and
170    generate a nested dict from it. The HTML file is assumed to contain three `<table>`
171    items that contain a single `<td class="header">` item with the title and a `<tr>`
172    section with four `<td>` items per parameter (being *parameter-name*, *channel*,
173    *source* and *value*), e.g. something of this form:
175    ```
176    _____________________________________________
177    |___________________title___________________|
178    |_________________(ignored)_________________|
179    | parameter-name | channel | source | value |
180    ...
181    | parameter-name | channel | source | value |
182    ---------------------------------------------
183    ```
185    Parameters
186    ----------
187    fname : str
188        The filename of the job's HTML parameter summary or (e.g.) the resulting image
189        file. In case `fname` doesn't end in the common parameter summary suffix (for
190        example if the image file name was provided), the function tries to derive the
191        name of summary file and use that one for parsing.
193    Returns
194    -------
195    dict(dict)
196        A dict with the parsed section names (table titles) being the keys, each
197        containing another dict with the parameter names as keys (including the channel
198        unless the parameter is channel-independent). See the example below.
200    Example
201    -------
202    >>> parse_summary('image_001.parameters.txt')
203    ... {
204    ...     "Image Parameters": {
205    ...         "Emission wavelength (nm) [ch:0]": "567.000",
206    ...         "Excitation wavelength (nm) [ch:0]": "456.000",
207    ...         "Lens refractive index [ch:0]": "4.567",
208    ...         "Microscope type [ch:0]": "widefield",
209    ...         "Numerical aperture [ch:0]": "2.345",
210    ...         "Point Spread Function": "theoretical",
211    ...         "Sample refractive index [ch:0]": "3.456",
212    ...         "Time interval (s)": "1.000000",
213    ...         "X pixel size (μm)": "0.123456",
214    ...         "Y pixel size (μm)": "0.123456",
215    ...         "Z step size (μm)": "0.234567",
216    ...     },
217    ...     "Restoration Parameters": {
218    ...         "Autocrop": "no",
219    ...         "Background estimation": "auto",
220    ...         "Deconvolution algorithm": "iiff",
221    ...         "Number of iterations": "42",
222    ...         "Quality stop criterion": "0.000007",
223    ...         "Signal/Noise ratio [ch:0]": "99",
224    ...     },
225    ... }
226    """
227    # In case `fname` doesn't end with the common suffix for job summary files check if
228    # it is the actual *image* filename of an HRM job and try to use the corresponding
229    # parameter summary file instead:
230    suffix = ".parameters.txt"
231    if not fname.endswith(suffix):
232        candidate = parse_job_basename(fname) + ".parameters.txt"
233        if os.path.exists(candidate):
234            log.debug(f"Found [{candidate}], will use it instead of [{fname}].")
235            fname = candidate
237    log.debug(f"Trying to parse job parameter summary file [{fname}]...")
239    try:
240        with open(fname, "r", encoding="utf-8") as soupfile:
241            soup = BeautifulSoup(soupfile, features="html.parser")
242            log.trace(f"BeautifulSoup successfully parsed [{fname}].")
243    except IOError as err:
244        log.error(f"Unable to open parameter summary file [{fname}]: {err}")
245        return None
246    except Exception as err:  # pragma: no cover  # pylint: disable-msg=broad-except
247        log.error(f"Parsing summary file [{fname}] failed: {err}")
248        return None
250    sections = {}  # job parameter summaries have multiple sections split by headers
251    rows = []
252    for table in soup.findAll("table"):
253        log.trace("Parsing table header...")
254        try:
255            rows = table.findAll("tr")
256            header = rows[0].findAll("td", class_="header")[0].text
257        except Exception:  # pylint: disable-msg=broad-except
258            log.debug("Skipping table entry that doesn't have a header.")
259            continue
260        log.trace(f"Parsed table header: {header}")
261        if header in sections:
262            raise KeyError(f"Error parsing parameters, duplicate header: {header}")
264        pairs = {}
265        # and the table body, starting from the 3rd <tr> item:
266        for row in rows[2:]:
267            cols = row.findAll("td")
268            # parse the parameter "name":
269            param_key = cols[0].text
270            log.trace(f"Parsed (raw) key name: {param_key}")
271            # replace HTML-encoded chars:
272            param_key = param_key.replace("&mu;m", "µm")
274            # parse the channel and add it to the key-string (unless it's "All"):
275            channel = cols[1].text
276            if channel == "All":
277                channel = ""
278            else:
279                channel = f" [ch:{channel}]"
280            param_key += channel
282            # parse the parameter value:
283            param_value = cols[3].text
285            # finally add a new entry to the dict unless the key already exists:
286            if param_key in pairs:
287                raise KeyError(f"Parsing failed, duplicate parameter: {param_key}")
288            pairs[param_key] = param_value
289        sections[header] = pairs
291    log.success(f"Processed {len(rows)} table rows.")
292    return sections
