hrm_omero.hrm
Helper functions to interact with the HRM.
1"""Helper functions to interact with the HRM.""" 2 3import os.path 4import re 5import shlex 6 7from bs4 import BeautifulSoup 8from loguru import logger as log 9 10 11def parse_config(filename): 12 """Assemble a dict from the HRM config file (shell syntax). 13 14 Usually, the config is located at /etc/hrm.conf and written in shell syntax as 15 this file simply gets sourced by the bash init script and other shell based 16 tools. 17 18 Parameters 19 ---------- 20 filename : str 21 The name of the configuration file to be parsed. 22 23 Returns 24 ------- 25 dict 26 A dict with the parsed configuration items. 27 28 Raises 29 ------ 30 SyntaxError 31 Raised in case the given configuration file can't be parsed correctly. 32 33 Example 34 ------- 35 >>> parse_config('/etc/hrm.conf') 36 ... { 37 ... 'HRM_DATA': '/export/hrm_data', 38 ... 'HRM_DEST': 'dst', 39 ... 'HRM_HOME': '/var/www/hrm', 40 ... 'HRM_LOG': '/var/log/hrm', 41 ... 'HRM_SOURCE': 'src', 42 ... 'OMERO_HOSTNAME': 'omero.mynetwork.xy', 43 ... 'OMERO_PKG': '/opt/OMERO/OMERO.server', 44 ... 'OMERO_PORT': '4064', 45 ... 'PHP_CLI': '/usr/local/php/bin/php', 46 ... 'SUSER': 'hrm' 47 ... } 48 """ 49 log.debug(f"Trying to parse HRM configuration file [{filename}]...") 50 config = dict() 51 with open(filename, "r", encoding="utf-8") as file: 52 body = file.read() 53 54 lexer = shlex.shlex(body) 55 lexer.wordchars += "-./" 56 while True: 57 token = lexer.get_token() 58 if token is None or token == "": 59 break 60 # it's valid sh syntax to use a semicolon to join lines, so accept it: 61 if token == ";": 62 continue 63 # we assume entries of the following form: 64 # KEY="some-value" 65 key = token 66 try: 67 equals = lexer.get_token() 68 assert equals == "=" 69 except AssertionError: 70 raise SyntaxError( # pylint: disable-msg=raise-missing-from 71 f"Can't parse {filename}, invalid syntax in line {lexer.lineno} " 72 f"(expected '=', found '{equals}')." 73 ) 74 except Exception as err: # pragma: no cover # pylint: disable-msg=broad-except 75 log.warning(f"Error parsing config: {err}") 76 value = lexer.get_token() 77 value = value.replace('"', "") # remove double quotes 78 value = value.replace("'", "") # remove single quotes 79 config[key] = value 80 log.info("Successfully parsed [{}].", filename) 81 return config 82 83 84def check_config(config): 85 """Check the config dict for required entries. 86 87 Parameters 88 ---------- 89 config : dict 90 A dict with a parsed configuration, as returned by `parse_hrm_conf()`. 91 92 Raises 93 ------ 94 SyntaxError 95 Raised in case one of the required configuration items is missing. 96 """ 97 required = ["OMERO_PKG", "OMERO_HOSTNAME"] 98 for entry in required: 99 if entry not in config: 100 raise SyntaxError(f"Missing '{entry}'' in the HRM config file.") 101 log.debug("HRM config file passed all checks.") 102 103 104def job_parameter_summary(fname): 105 """Generate a parameter summary text from the HRM-generated HTML file. 106 107 Call the parser for the HTML file generated by the HRM containing the parameter 108 summary and generate a plain-text version from the parsed results. 109 110 Parameters 111 ---------- 112 fname : str 113 The filename of the job's HTML parameter summary. 114 115 Returns 116 ------- 117 str 118 The formatted string containing the parameter summary. 119 """ 120 parsed = parse_summary(fname) 121 if parsed is None: 122 return None 123 124 summary = "" 125 for section in parsed: 126 summary += f"{section}\n==============================\n" 127 for parameter in parsed[section]: 128 summary += f"{parameter}: {parsed[section][parameter]}\n" 129 log.debug(f"Job parameter summary:\n---\n{summary}---") 130 log.success("Generated parameter summary.") 131 return summary 132 133 134def parse_job_basename(fname): 135 """Parse the basename from an HRM job result file name. 136 137 HRM job IDs are generated via PHP's `uniqid()` call that is giving a 13-digit 138 hexadecimal string (8 digits UNIX time and 5 digits microsconds). The HRM labels its 139 result files by appending an underscore (`_`) followed by this ID and an `_hrm` 140 suffix. This function tries to match this section and remove everything *after* it 141 from the name. 142 143 Its intention is to safely remove the suffix from an image file name while taking no 144 assumptions about how the suffix looks like (could e.g. be `.ics`, `.ome.tif` or 145 similar). 146 147 Parameters 148 ---------- 149 fname : str 150 The input string, usually the name of an HRM result file (but any string is 151 accepted). 152 153 Returns 154 ------- 155 str 156 The input string (`fname`) where everything *after* an HRM-like job label (e.g. 157 `_abcdef0123456_hrm` or `_f435a27b9c85e_hrm`) is removed. In case the input 158 string does *not* contain a matching section it is returned 159 """ 160 log.trace(fname) 161 basename = re.sub(r"(_[0-9a-f]{13}_hrm)\..*", r"\1", fname) 162 log.trace(basename) 163 return basename 164 165 166def parse_summary(fname): 167 """Parse the job parameter summary generated by HRM into a dict. 168 169 Parse the HTML file generated by the HRM containing the parameter summary and 170 generate a nested dict from it. The HTML file is assumed to contain three `<table>` 171 items that contain a single `<td class="header">` item with the title and a `<tr>` 172 section with four `<td>` items per parameter (being *parameter-name*, *channel*, 173 *source* and *value*), e.g. something of this form: 174 175 ``` 176 _____________________________________________ 177 |___________________title___________________| 178 |_________________(ignored)_________________| 179 | parameter-name | channel | source | value | 180 ... 181 | parameter-name | channel | source | value | 182 --------------------------------------------- 183 ``` 184 185 Parameters 186 ---------- 187 fname : str 188 The filename of the job's HTML parameter summary or (e.g.) the resulting image 189 file. In case `fname` doesn't end in the common parameter summary suffix (for 190 example if the image file name was provided), the function tries to derive the 191 name of summary file and use that one for parsing. 192 193 Returns 194 ------- 195 dict(dict) 196 A dict with the parsed section names (table titles) being the keys, each 197 containing another dict with the parameter names as keys (including the channel 198 unless the parameter is channel-independent). See the example below. 199 200 Example 201 ------- 202 >>> parse_summary('image_001.parameters.txt') 203 ... { 204 ... "Image Parameters": { 205 ... "Emission wavelength (nm) [ch:0]": "567.000", 206 ... "Excitation wavelength (nm) [ch:0]": "456.000", 207 ... "Lens refractive index [ch:0]": "4.567", 208 ... "Microscope type [ch:0]": "widefield", 209 ... "Numerical aperture [ch:0]": "2.345", 210 ... "Point Spread Function": "theoretical", 211 ... "Sample refractive index [ch:0]": "3.456", 212 ... "Time interval (s)": "1.000000", 213 ... "X pixel size (μm)": "0.123456", 214 ... "Y pixel size (μm)": "0.123456", 215 ... "Z step size (μm)": "0.234567", 216 ... }, 217 ... "Restoration Parameters": { 218 ... "Autocrop": "no", 219 ... "Background estimation": "auto", 220 ... "Deconvolution algorithm": "iiff", 221 ... "Number of iterations": "42", 222 ... "Quality stop criterion": "0.000007", 223 ... "Signal/Noise ratio [ch:0]": "99", 224 ... }, 225 ... } 226 """ 227 # In case `fname` doesn't end with the common suffix for job summary files check if 228 # it is the actual *image* filename of an HRM job and try to use the corresponding 229 # parameter summary file instead: 230 suffix = ".parameters.txt" 231 if not fname.endswith(suffix): 232 candidate = parse_job_basename(fname) + ".parameters.txt" 233 if os.path.exists(candidate): 234 log.debug(f"Found [{candidate}], will use it instead of [{fname}].") 235 fname = candidate 236 237 log.debug(f"Trying to parse job parameter summary file [{fname}]...") 238 239 try: 240 with open(fname, "r", encoding="utf-8") as soupfile: 241 soup = BeautifulSoup(soupfile, features="html.parser") 242 log.trace(f"BeautifulSoup successfully parsed [{fname}].") 243 except IOError as err: 244 log.error(f"Unable to open parameter summary file [{fname}]: {err}") 245 return None 246 except Exception as err: # pragma: no cover # pylint: disable-msg=broad-except 247 log.error(f"Parsing summary file [{fname}] failed: {err}") 248 return None 249 250 sections = {} # job parameter summaries have multiple sections split by headers 251 rows = [] 252 for table in soup.findAll("table"): 253 log.trace("Parsing table header...") 254 try: 255 rows = table.findAll("tr") 256 header = rows[0].findAll("td", class_="header")[0].text 257 except Exception: # pylint: disable-msg=broad-except 258 log.debug("Skipping table entry that doesn't have a header.") 259 continue 260 log.trace(f"Parsed table header: {header}") 261 if header in sections: 262 raise KeyError(f"Error parsing parameters, duplicate header: {header}") 263 264 pairs = {} 265 # and the table body, starting from the 3rd <tr> item: 266 for row in rows[2:]: 267 cols = row.findAll("td") 268 # parse the parameter "name": 269 param_key = cols[0].text 270 log.trace(f"Parsed (raw) key name: {param_key}") 271 # replace HTML-encoded chars: 272 param_key = param_key.replace("μm", "µm") 273 274 # parse the channel and add it to the key-string (unless it's "All"): 275 channel = cols[1].text 276 if channel == "All": 277 channel = "" 278 else: 279 channel = f" [ch:{channel}]" 280 param_key += channel 281 282 # parse the parameter value: 283 param_value = cols[3].text 284 285 # finally add a new entry to the dict unless the key already exists: 286 if param_key in pairs: 287 raise KeyError(f"Parsing failed, duplicate parameter: {param_key}") 288 pairs[param_key] = param_value 289 sections[header] = pairs 290 291 log.success(f"Processed {len(rows)} table rows.") 292 return sections
12def parse_config(filename): 13 """Assemble a dict from the HRM config file (shell syntax). 14 15 Usually, the config is located at /etc/hrm.conf and written in shell syntax as 16 this file simply gets sourced by the bash init script and other shell based 17 tools. 18 19 Parameters 20 ---------- 21 filename : str 22 The name of the configuration file to be parsed. 23 24 Returns 25 ------- 26 dict 27 A dict with the parsed configuration items. 28 29 Raises 30 ------ 31 SyntaxError 32 Raised in case the given configuration file can't be parsed correctly. 33 34 Example 35 ------- 36 >>> parse_config('/etc/hrm.conf') 37 ... { 38 ... 'HRM_DATA': '/export/hrm_data', 39 ... 'HRM_DEST': 'dst', 40 ... 'HRM_HOME': '/var/www/hrm', 41 ... 'HRM_LOG': '/var/log/hrm', 42 ... 'HRM_SOURCE': 'src', 43 ... 'OMERO_HOSTNAME': 'omero.mynetwork.xy', 44 ... 'OMERO_PKG': '/opt/OMERO/OMERO.server', 45 ... 'OMERO_PORT': '4064', 46 ... 'PHP_CLI': '/usr/local/php/bin/php', 47 ... 'SUSER': 'hrm' 48 ... } 49 """ 50 log.debug(f"Trying to parse HRM configuration file [{filename}]...") 51 config = dict() 52 with open(filename, "r", encoding="utf-8") as file: 53 body = file.read() 54 55 lexer = shlex.shlex(body) 56 lexer.wordchars += "-./" 57 while True: 58 token = lexer.get_token() 59 if token is None or token == "": 60 break 61 # it's valid sh syntax to use a semicolon to join lines, so accept it: 62 if token == ";": 63 continue 64 # we assume entries of the following form: 65 # KEY="some-value" 66 key = token 67 try: 68 equals = lexer.get_token() 69 assert equals == "=" 70 except AssertionError: 71 raise SyntaxError( # pylint: disable-msg=raise-missing-from 72 f"Can't parse {filename}, invalid syntax in line {lexer.lineno} " 73 f"(expected '=', found '{equals}')." 74 ) 75 except Exception as err: # pragma: no cover # pylint: disable-msg=broad-except 76 log.warning(f"Error parsing config: {err}") 77 value = lexer.get_token() 78 value = value.replace('"', "") # remove double quotes 79 value = value.replace("'", "") # remove single quotes 80 config[key] = value 81 log.info("Successfully parsed [{}].", filename) 82 return config
Assemble a dict from the HRM config file (shell syntax).
Usually, the config is located at /etc/hrm.conf and written in shell syntax as this file simply gets sourced by the bash init script and other shell based tools.
Parameters
- filename (str): The name of the configuration file to be parsed.
Returns
- dict: A dict with the parsed configuration items.
Raises
- SyntaxError: Raised in case the given configuration file can't be parsed correctly.
Example
>>> parse_config('/etc/hrm.conf')
... {
... 'HRM_DATA': '/export/hrm_data',
... 'HRM_DEST': 'dst',
... 'HRM_HOME': '/var/www/hrm',
... 'HRM_LOG': '/var/log/hrm',
... 'HRM_SOURCE': 'src',
... 'OMERO_HOSTNAME': 'omero.mynetwork.xy',
... 'OMERO_PKG': '/opt/OMERO/OMERO.server',
... 'OMERO_PORT': '4064',
... 'PHP_CLI': '/usr/local/php/bin/php',
... 'SUSER': 'hrm'
... }
85def check_config(config): 86 """Check the config dict for required entries. 87 88 Parameters 89 ---------- 90 config : dict 91 A dict with a parsed configuration, as returned by `parse_hrm_conf()`. 92 93 Raises 94 ------ 95 SyntaxError 96 Raised in case one of the required configuration items is missing. 97 """ 98 required = ["OMERO_PKG", "OMERO_HOSTNAME"] 99 for entry in required: 100 if entry not in config: 101 raise SyntaxError(f"Missing '{entry}'' in the HRM config file.") 102 log.debug("HRM config file passed all checks.")
Check the config dict for required entries.
Parameters
- config (dict):
A dict with a parsed configuration, as returned by
parse_hrm_conf()
.
Raises
- SyntaxError: Raised in case one of the required configuration items is missing.
105def job_parameter_summary(fname): 106 """Generate a parameter summary text from the HRM-generated HTML file. 107 108 Call the parser for the HTML file generated by the HRM containing the parameter 109 summary and generate a plain-text version from the parsed results. 110 111 Parameters 112 ---------- 113 fname : str 114 The filename of the job's HTML parameter summary. 115 116 Returns 117 ------- 118 str 119 The formatted string containing the parameter summary. 120 """ 121 parsed = parse_summary(fname) 122 if parsed is None: 123 return None 124 125 summary = "" 126 for section in parsed: 127 summary += f"{section}\n==============================\n" 128 for parameter in parsed[section]: 129 summary += f"{parameter}: {parsed[section][parameter]}\n" 130 log.debug(f"Job parameter summary:\n---\n{summary}---") 131 log.success("Generated parameter summary.") 132 return summary
Generate a parameter summary text from the HRM-generated HTML file.
Call the parser for the HTML file generated by the HRM containing the parameter summary and generate a plain-text version from the parsed results.
Parameters
- fname (str): The filename of the job's HTML parameter summary.
Returns
- str: The formatted string containing the parameter summary.
135def parse_job_basename(fname): 136 """Parse the basename from an HRM job result file name. 137 138 HRM job IDs are generated via PHP's `uniqid()` call that is giving a 13-digit 139 hexadecimal string (8 digits UNIX time and 5 digits microsconds). The HRM labels its 140 result files by appending an underscore (`_`) followed by this ID and an `_hrm` 141 suffix. This function tries to match this section and remove everything *after* it 142 from the name. 143 144 Its intention is to safely remove the suffix from an image file name while taking no 145 assumptions about how the suffix looks like (could e.g. be `.ics`, `.ome.tif` or 146 similar). 147 148 Parameters 149 ---------- 150 fname : str 151 The input string, usually the name of an HRM result file (but any string is 152 accepted). 153 154 Returns 155 ------- 156 str 157 The input string (`fname`) where everything *after* an HRM-like job label (e.g. 158 `_abcdef0123456_hrm` or `_f435a27b9c85e_hrm`) is removed. In case the input 159 string does *not* contain a matching section it is returned 160 """ 161 log.trace(fname) 162 basename = re.sub(r"(_[0-9a-f]{13}_hrm)\..*", r"\1", fname) 163 log.trace(basename) 164 return basename
Parse the basename from an HRM job result file name.
HRM job IDs are generated via PHP's uniqid()
call that is giving a 13-digit
hexadecimal string (8 digits UNIX time and 5 digits microsconds). The HRM labels its
result files by appending an underscore (_
) followed by this ID and an _hrm
suffix. This function tries to match this section and remove everything after it
from the name.
Its intention is to safely remove the suffix from an image file name while taking no
assumptions about how the suffix looks like (could e.g. be .ics
, .ome.tif
or
similar).
Parameters
- fname (str): The input string, usually the name of an HRM result file (but any string is accepted).
Returns
- str: The input string (
fname
) where everything after an HRM-like job label (e.g._abcdef0123456_hrm
or_f435a27b9c85e_hrm
) is removed. In case the input string does not contain a matching section it is returned
167def parse_summary(fname): 168 """Parse the job parameter summary generated by HRM into a dict. 169 170 Parse the HTML file generated by the HRM containing the parameter summary and 171 generate a nested dict from it. The HTML file is assumed to contain three `<table>` 172 items that contain a single `<td class="header">` item with the title and a `<tr>` 173 section with four `<td>` items per parameter (being *parameter-name*, *channel*, 174 *source* and *value*), e.g. something of this form: 175 176 ``` 177 _____________________________________________ 178 |___________________title___________________| 179 |_________________(ignored)_________________| 180 | parameter-name | channel | source | value | 181 ... 182 | parameter-name | channel | source | value | 183 --------------------------------------------- 184 ``` 185 186 Parameters 187 ---------- 188 fname : str 189 The filename of the job's HTML parameter summary or (e.g.) the resulting image 190 file. In case `fname` doesn't end in the common parameter summary suffix (for 191 example if the image file name was provided), the function tries to derive the 192 name of summary file and use that one for parsing. 193 194 Returns 195 ------- 196 dict(dict) 197 A dict with the parsed section names (table titles) being the keys, each 198 containing another dict with the parameter names as keys (including the channel 199 unless the parameter is channel-independent). See the example below. 200 201 Example 202 ------- 203 >>> parse_summary('image_001.parameters.txt') 204 ... { 205 ... "Image Parameters": { 206 ... "Emission wavelength (nm) [ch:0]": "567.000", 207 ... "Excitation wavelength (nm) [ch:0]": "456.000", 208 ... "Lens refractive index [ch:0]": "4.567", 209 ... "Microscope type [ch:0]": "widefield", 210 ... "Numerical aperture [ch:0]": "2.345", 211 ... "Point Spread Function": "theoretical", 212 ... "Sample refractive index [ch:0]": "3.456", 213 ... "Time interval (s)": "1.000000", 214 ... "X pixel size (μm)": "0.123456", 215 ... "Y pixel size (μm)": "0.123456", 216 ... "Z step size (μm)": "0.234567", 217 ... }, 218 ... "Restoration Parameters": { 219 ... "Autocrop": "no", 220 ... "Background estimation": "auto", 221 ... "Deconvolution algorithm": "iiff", 222 ... "Number of iterations": "42", 223 ... "Quality stop criterion": "0.000007", 224 ... "Signal/Noise ratio [ch:0]": "99", 225 ... }, 226 ... } 227 """ 228 # In case `fname` doesn't end with the common suffix for job summary files check if 229 # it is the actual *image* filename of an HRM job and try to use the corresponding 230 # parameter summary file instead: 231 suffix = ".parameters.txt" 232 if not fname.endswith(suffix): 233 candidate = parse_job_basename(fname) + ".parameters.txt" 234 if os.path.exists(candidate): 235 log.debug(f"Found [{candidate}], will use it instead of [{fname}].") 236 fname = candidate 237 238 log.debug(f"Trying to parse job parameter summary file [{fname}]...") 239 240 try: 241 with open(fname, "r", encoding="utf-8") as soupfile: 242 soup = BeautifulSoup(soupfile, features="html.parser") 243 log.trace(f"BeautifulSoup successfully parsed [{fname}].") 244 except IOError as err: 245 log.error(f"Unable to open parameter summary file [{fname}]: {err}") 246 return None 247 except Exception as err: # pragma: no cover # pylint: disable-msg=broad-except 248 log.error(f"Parsing summary file [{fname}] failed: {err}") 249 return None 250 251 sections = {} # job parameter summaries have multiple sections split by headers 252 rows = [] 253 for table in soup.findAll("table"): 254 log.trace("Parsing table header...") 255 try: 256 rows = table.findAll("tr") 257 header = rows[0].findAll("td", class_="header")[0].text 258 except Exception: # pylint: disable-msg=broad-except 259 log.debug("Skipping table entry that doesn't have a header.") 260 continue 261 log.trace(f"Parsed table header: {header}") 262 if header in sections: 263 raise KeyError(f"Error parsing parameters, duplicate header: {header}") 264 265 pairs = {} 266 # and the table body, starting from the 3rd <tr> item: 267 for row in rows[2:]: 268 cols = row.findAll("td") 269 # parse the parameter "name": 270 param_key = cols[0].text 271 log.trace(f"Parsed (raw) key name: {param_key}") 272 # replace HTML-encoded chars: 273 param_key = param_key.replace("μm", "µm") 274 275 # parse the channel and add it to the key-string (unless it's "All"): 276 channel = cols[1].text 277 if channel == "All": 278 channel = "" 279 else: 280 channel = f" [ch:{channel}]" 281 param_key += channel 282 283 # parse the parameter value: 284 param_value = cols[3].text 285 286 # finally add a new entry to the dict unless the key already exists: 287 if param_key in pairs: 288 raise KeyError(f"Parsing failed, duplicate parameter: {param_key}") 289 pairs[param_key] = param_value 290 sections[header] = pairs 291 292 log.success(f"Processed {len(rows)} table rows.") 293 return sections
Parse the job parameter summary generated by HRM into a dict.
Parse the HTML file generated by the HRM containing the parameter summary and
generate a nested dict from it. The HTML file is assumed to contain three <table>
items that contain a single <td class="header">
item with the title and a <tr>
section with four <td>
items per parameter (being parameter-name, channel,
source and value), e.g. something of this form:
_____________________________________________
|___________________title___________________|
|_________________(ignored)_________________|
| parameter-name | channel | source | value |
...
| parameter-name | channel | source | value |
---------------------------------------------
Parameters
- fname (str):
The filename of the job's HTML parameter summary or (e.g.) the resulting image
file. In case
fname
doesn't end in the common parameter summary suffix (for example if the image file name was provided), the function tries to derive the name of summary file and use that one for parsing.
Returns
- dict(dict): A dict with the parsed section names (table titles) being the keys, each containing another dict with the parameter names as keys (including the channel unless the parameter is channel-independent). See the example below.
Example
>>> parse_summary('image_001.parameters.txt')
... {
... "Image Parameters": {
... "Emission wavelength (nm) [ch:0]": "567.000",
... "Excitation wavelength (nm) [ch:0]": "456.000",
... "Lens refractive index [ch:0]": "4.567",
... "Microscope type [ch:0]": "widefield",
... "Numerical aperture [ch:0]": "2.345",
... "Point Spread Function": "theoretical",
... "Sample refractive index [ch:0]": "3.456",
... "Time interval (s)": "1.000000",
... "X pixel size (μm)": "0.123456",
... "Y pixel size (μm)": "0.123456",
... "Z step size (μm)": "0.234567",
... },
... "Restoration Parameters": {
... "Autocrop": "no",
... "Background estimation": "auto",
... "Deconvolution algorithm": "iiff",
... "Number of iterations": "42",
... "Quality stop criterion": "0.000007",
... "Signal/Noise ratio [ch:0]": "99",
... },
... }