Coverage for peakipy/io.py: 92%
486 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-09-14 14:49 -0400
« prev ^ index » next coverage.py v7.4.4, created at 2024-09-14 14:49 -0400
1import sys
2from pathlib import Path
3from enum import Enum
5import numpy as np
6import nmrglue as ng
7import pandas as pd
8import textwrap
9from rich import print
10from rich.console import Console
13from bokeh.palettes import Category20
14from scipy import ndimage
15from skimage.morphology import square, binary_closing, disk, rectangle
16from skimage.filters import threshold_otsu
17from pydantic import BaseModel
19from peakipy.utils import df_to_rich_table
20from peakipy.fitting import make_mask
22console = Console()
25class StrucEl(str, Enum):
26 square = "square"
27 disk = "disk"
28 rectangle = "rectangle"
29 mask_method = "mask_method"
32class PeaklistFormat(str, Enum):
33 a2 = "a2"
34 a3 = "a3"
35 sparky = "sparky"
36 pipe = "pipe"
37 peakipy = "peakipy"
40class OutFmt(str, Enum):
41 csv = "csv"
42 pkl = "pkl"
45class PeaklistColumns(BaseModel):
46 """These are the columns required for performing fits in peakipy"""
48 INDEX: int
49 X_AXIS: int
50 Y_AXIS: int
51 X_AXISf: float
52 Y_AXISf: float
53 X_PPM: float
54 Y_PPM: float
55 XW: float
56 YW: float
57 XW_HZ: float
58 YW_HZ: float
59 HEIGHT: float
60 VOL: float
61 ASS: str
62 X_RADIUS: float
63 Y_RADIUS: float
64 X_RADIUS_PPM: float
65 Y_RADIUS_PPM: float
66 include: str
69class PeaklistColumnsWithClusters(PeaklistColumns):
70 CLUSTID: int
71 MEMCNT: int
72 color: str
75class Pseudo3D:
76 """Read dic, data from NMRGlue and dims from input to create a Pseudo3D dataset
78 :param dic: from nmrglue.pipe.read
79 :type dic: dict
81 :param data: data from nmrglue.pipe.read
82 :type data: numpy.array
84 :param dims: dimension order i.e [0,1,2] where 0 = planes, 1 = f1, 2 = f2
85 :type dims: list
86 """
88 def __init__(self, dic, data, dims):
89 # check dimensions
90 self._udic = ng.pipe.guess_udic(dic, data)
91 self._ndim = self._udic["ndim"]
93 if self._ndim == 1:
94 err = f"""[red]
95 ##########################################
96 NMR Data should be either 2D or 3D
97 ##########################################
98 [/red]"""
99 # raise TypeError(err)
100 sys.exit(err)
102 # check that spectrum has correct number of dims
103 elif self._ndim != len(dims):
104 err = f"""[red]
105 #################################################################
106 Your spectrum has {self._ndim} dimensions with shape {data.shape}
107 but you have given a dimension order of {dims}...
108 #################################################################
109 [/red]"""
110 # raise ValueError(err)
111 sys.exit(err)
113 elif (self._ndim == 2) and (len(dims) == 2):
114 self._f1_dim, self._f2_dim = dims
115 self._planes = 0
116 self._uc_f1 = ng.pipe.make_uc(dic, data, dim=self._f1_dim)
117 self._uc_f2 = ng.pipe.make_uc(dic, data, dim=self._f2_dim)
118 # make data pseudo3d
119 self._data = data.reshape((1, data.shape[0], data.shape[1]))
120 self._dims = [self._planes, self._f1_dim + 1, self._f2_dim + 1]
122 else:
123 self._planes, self._f1_dim, self._f2_dim = dims
124 self._dims = dims
125 self._data = data
126 # make unit conversion dicts
127 self._uc_f2 = ng.pipe.make_uc(dic, data, dim=self._f2_dim)
128 self._uc_f1 = ng.pipe.make_uc(dic, data, dim=self._f1_dim)
130 # rearrange data if dims not in standard order
131 if self._dims != [0, 1, 2]:
132 # np.argsort returns indices of array for order 0,1,2 to transpose data correctly
133 # self._dims = np.argsort(self._dims)
134 self._data = np.transpose(data, self._dims)
136 self._dic = dic
138 self._f1_label = self._udic[self._f1_dim]["label"]
139 self._f2_label = self._udic[self._f2_dim]["label"]
141 @property
142 def uc_f1(self):
143 """Return unit conversion dict for F1"""
144 return self._uc_f1
146 @property
147 def uc_f2(self):
148 """Return unit conversion dict for F2"""
149 return self._uc_f2
151 @property
152 def dims(self):
153 """Return dimension order"""
154 return self._dims
156 @property
157 def data(self):
158 """Return array containing data"""
159 return self._data
161 @data.setter
162 def data(self, data):
163 self._data = data
165 @property
166 def dic(self):
167 return self._dic
169 @property
170 def udic(self):
171 return self._udic
173 @property
174 def ndim(self):
175 return self._ndim
177 @property
178 def f1_label(self):
179 # dim label
180 return self._f1_label
182 @property
183 def f2_label(self):
184 # dim label
185 return self._f2_label
187 @property
188 def planes(self):
189 return self.dims[0]
191 @property
192 def n_planes(self):
193 return self.data.shape[self.planes]
195 @property
196 def f1(self):
197 return self.dims[1]
199 @property
200 def f2(self):
201 return self.dims[2]
203 # size of f1 and f2 in points
204 @property
205 def f2_size(self):
206 """Return size of f2 dimension in points"""
207 return self._udic[self._f2_dim]["size"]
209 @property
210 def f1_size(self):
211 """Return size of f1 dimension in points"""
212 return self._udic[self._f1_dim]["size"]
214 # points per ppm
215 @property
216 def pt_per_ppm_f1(self):
217 return self.f1_size / (
218 self._udic[self._f1_dim]["sw"] / self._udic[self._f1_dim]["obs"]
219 )
221 @property
222 def pt_per_ppm_f2(self):
223 return self.f2_size / (
224 self._udic[self._f2_dim]["sw"] / self._udic[self._f2_dim]["obs"]
225 )
227 # points per hz
228 @property
229 def pt_per_hz_f1(self):
230 return self.f1_size / self._udic[self._f1_dim]["sw"]
232 @property
233 def pt_per_hz_f2(self):
234 return self.f2_size / self._udic[self._f2_dim]["sw"]
236 # hz per point
237 @property
238 def hz_per_pt_f1(self):
239 return 1.0 / self.pt_per_hz_f1
241 @property
242 def hz_per_pt_f2(self):
243 return 1.0 / self.pt_per_hz_f2
245 # ppm per point
246 @property
247 def ppm_per_pt_f1(self):
248 return 1.0 / self.pt_per_ppm_f1
250 @property
251 def ppm_per_pt_f2(self):
252 return 1.0 / self.pt_per_ppm_f2
254 # get ppm limits for ppm scales
255 @property
256 def f2_ppm_scale(self):
257 return self.uc_f2.ppm_scale()
259 @property
260 def f1_ppm_scale(self):
261 return self.uc_f1.ppm_scale()
263 @property
264 def f2_ppm_limits(self):
265 return self.uc_f2.ppm_limits()
267 @property
268 def f1_ppm_limits(self):
269 return self.uc_f1.ppm_limits()
271 @property
272 def f1_ppm_max(self):
273 return max(self.f1_ppm_limits)
275 @property
276 def f1_ppm_min(self):
277 return min(self.f1_ppm_limits)
279 @property
280 def f2_ppm_max(self):
281 return max(self.f2_ppm_limits)
283 @property
284 def f2_ppm_min(self):
285 return min(self.f2_ppm_limits)
287 @property
288 def f2_ppm_0(self):
289 return self.f2_ppm_limits[0]
291 @property
292 def f2_ppm_1(self):
293 return self.f2_ppm_limits[1]
295 @property
296 def f1_ppm_0(self):
297 return self.f1_ppm_limits[0]
299 @property
300 def f1_ppm_1(self):
301 return self.f1_ppm_limits[1]
304class UnknownFormat(Exception):
305 pass
308class Peaklist(Pseudo3D):
309 """Read analysis, sparky or NMRPipe peak list and convert to NMRPipe-ish format also find peak clusters
311 Parameters
312 ----------
313 path : path-like or str
314 path to peaklist
315 data_path : ndarray
316 NMRPipe format data
317 fmt : str
318 a2|a3|sparky|pipe
319 dims: list
320 [planes,y,x]
321 radii: list
322 [x,y] Mask radii in ppm
325 Methods
326 -------
328 clusters :
329 mask_method :
330 adaptive_clusters :
332 Returns
333 -------
334 df : pandas DataFrame
335 dataframe containing peaklist
337 """
339 def __init__(
340 self,
341 path,
342 data_path,
343 fmt: PeaklistFormat = PeaklistFormat.a2,
344 dims=[0, 1, 2],
345 radii=[0.04, 0.4],
346 posF1="Position F2",
347 posF2="Position F1",
348 verbose=False,
349 ):
350 dic, data = ng.pipe.read(data_path)
351 Pseudo3D.__init__(self, dic, data, dims)
352 self.fmt = fmt
353 self.peaklist_path = path
354 self.data_path = data_path
355 self.verbose = verbose
356 self._radii = radii
357 self._thres = None
358 if self.verbose:
359 print(
360 "Points per hz f1 = %.3f, f2 = %.3f"
361 % (self.pt_per_hz_f1, self.pt_per_hz_f2)
362 )
364 self._analysis_to_pipe_dic = {
365 "#": "INDEX",
366 "Position F1": "X_PPM",
367 "Position F2": "Y_PPM",
368 "Line Width F1 (Hz)": "XW_HZ",
369 "Line Width F2 (Hz)": "YW_HZ",
370 "Height": "HEIGHT",
371 "Volume": "VOL",
372 }
373 self._assign_to_pipe_dic = {
374 "#": "INDEX",
375 "Pos F1": "X_PPM",
376 "Pos F2": "Y_PPM",
377 "LW F1 (Hz)": "XW_HZ",
378 "LW F2 (Hz)": "YW_HZ",
379 "Height": "HEIGHT",
380 "Volume": "VOL",
381 }
383 self._sparky_to_pipe_dic = {
384 "index": "INDEX",
385 "w1": "X_PPM",
386 "w2": "Y_PPM",
387 "lw1 (hz)": "XW_HZ",
388 "lw2 (hz)": "YW_HZ",
389 "Height": "HEIGHT",
390 "Volume": "VOL",
391 "Assignment": "ASS",
392 }
394 self._analysis_to_pipe_dic[posF1] = "Y_PPM"
395 self._analysis_to_pipe_dic[posF2] = "X_PPM"
397 self._df = self.read_peaklist()
399 def read_peaklist(self):
400 match self.fmt:
401 case self.fmt.a2:
402 self._df = self._read_analysis()
404 case self.fmt.a3:
405 self._df = self._read_assign()
407 case self.fmt.sparky:
408 self._df = self._read_sparky()
410 case self.fmt.pipe:
411 self._df = self._read_pipe()
413 case _:
414 raise UnknownFormat("I don't know this format: {self.fmt}")
416 return self._df
418 @property
419 def df(self):
420 return self._df
422 @df.setter
423 def df(self, df):
424 self._df = df
425 return self._df
427 @property
428 def radii(self):
429 return self._radii
431 @property
432 def f2_radius(self):
433 """radius for fitting mask in f2"""
434 return self.radii[0]
436 @property
437 def f1_radius(self):
438 """radius for fitting mask in f1"""
439 return self.radii[1]
441 @property
442 def analysis_to_pipe_dic(self):
443 return self._analysis_to_pipe_dic
445 @property
446 def assign_to_pipe_dic(self):
447 return self._assign_to_pipe_dic
449 @property
450 def sparky_to_pipe_dic(self):
451 return self._sparky_to_pipe_dic
453 @property
454 def thres(self):
455 if self._thres == None:
456 self._thres = abs(threshold_otsu(self.data[0]))
457 return self._thres
458 else:
459 return self._thres
461 def validate_peaklist(self):
462 self.df = pd.DataFrame(
463 [
464 PeaklistColumns(**i).model_dump()
465 for i in self.df.to_dict(orient="records")
466 ]
467 )
468 return self.df
470 def update_df(self):
471 # int point value
472 self.df["X_AXIS"] = self.df.X_PPM.apply(lambda x: self.uc_f2(x, "ppm"))
473 self.df["Y_AXIS"] = self.df.Y_PPM.apply(lambda x: self.uc_f1(x, "ppm"))
474 # decimal point value
475 self.df["X_AXISf"] = self.df.X_PPM.apply(lambda x: self.uc_f2.f(x, "ppm"))
476 self.df["Y_AXISf"] = self.df.Y_PPM.apply(lambda x: self.uc_f1.f(x, "ppm"))
477 # in case of missing values (should estimate though)
478 self.df["XW_HZ"] = self.df.XW_HZ.replace("None", "20.0")
479 self.df["YW_HZ"] = self.df.YW_HZ.replace("None", "20.0")
480 self.df["XW_HZ"] = self.df.XW_HZ.replace(np.NaN, "20.0")
481 self.df["YW_HZ"] = self.df.YW_HZ.replace(np.NaN, "20.0")
482 # convert linewidths to float
483 self.df["XW_HZ"] = self.df.XW_HZ.apply(lambda x: float(x))
484 self.df["YW_HZ"] = self.df.YW_HZ.apply(lambda x: float(x))
485 # convert Hz lw to points
486 self.df["XW"] = self.df.XW_HZ.apply(lambda x: x * self.pt_per_hz_f2)
487 self.df["YW"] = self.df.YW_HZ.apply(lambda x: x * self.pt_per_hz_f1)
488 # makes an assignment column from Assign F1 and Assign F2 columns
489 # in analysis2.x and ccpnmr v3 assign peak lists
490 if self.fmt in [PeaklistFormat.a2, PeaklistFormat.a3]:
491 self.df["ASS"] = self.df.apply(
492 # lambda i: "".join([i["Assign F1"], i["Assign F2"]]), axis=1
493 lambda i: f"{i['Assign F1']}_{i['Assign F2']}",
494 axis=1,
495 )
497 # make default values for X and Y radii for fit masks
498 self.df["X_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f2_radius
499 self.df["Y_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f1_radius
500 self.df["X_RADIUS"] = self.df.X_RADIUS_PPM.apply(
501 lambda x: x * self.pt_per_ppm_f2
502 )
503 self.df["Y_RADIUS"] = self.df.Y_RADIUS_PPM.apply(
504 lambda x: x * self.pt_per_ppm_f1
505 )
506 # add include column
507 if "include" in self.df.columns:
508 pass
509 else:
510 self.df["include"] = self.df.apply(lambda x: "yes", axis=1)
512 # check assignments for duplicates
513 self.check_assignments()
514 # check that peaks are within the bounds of the data
515 self.check_peak_bounds()
516 self.validate_peaklist()
518 def add_fix_bound_columns(self):
519 """add columns containing parameter bounds (param_upper/param_lower)
520 and whether or not parameter should be fixed (yes/no)
522 For parameter bounding:
524 Column names are <param_name>_upper and <param_name>_lower for upper and lower bounds respectively.
525 Values are given as floating point. Value of 0.0 indicates that parameter is unbounded
526 X/Y positions are given in ppm
527 Linewidths are given in Hz
529 For parameter fixing:
531 Column names are <param_name>_fix.
532 Values are given as a string 'yes' or 'no'
534 """
535 pass
537 def _read_analysis(self):
538 df = pd.read_csv(self.peaklist_path, delimiter="\t")
539 new_columns = [self.analysis_to_pipe_dic.get(i, i) for i in df.columns]
540 pipe_columns = dict(zip(df.columns, new_columns))
541 df = df.rename(index=str, columns=pipe_columns)
543 return df
545 def _read_assign(self):
546 df = pd.read_csv(self.peaklist_path, delimiter="\t")
547 new_columns = [self.assign_to_pipe_dic.get(i, i) for i in df.columns]
548 pipe_columns = dict(zip(df.columns, new_columns))
549 df = df.rename(index=str, columns=pipe_columns)
551 return df
553 def _read_sparky(self):
554 df = pd.read_csv(
555 self.peaklist_path,
556 skiprows=1,
557 sep=r"\s+",
558 names=["ASS", "Y_PPM", "X_PPM"],
559 # use only first three columns
560 usecols=[i for i in range(3)],
561 )
562 df["INDEX"] = df.index
563 # need to add LW estimate
564 df["XW_HZ"] = 20.0
565 df["YW_HZ"] = 20.0
566 # dummy values
567 df["HEIGHT"] = 0.0
568 df["VOL"] = 0.0
569 return df
571 def _read_pipe(self):
572 to_skip = 0
573 with open(self.peaklist_path) as f:
574 lines = f.readlines()
575 for line in lines:
576 if line.startswith("VARS"):
577 columns = line.strip().split()[1:]
578 elif line[:5].strip(" ").isdigit():
579 break
580 else:
581 to_skip += 1
582 df = pd.read_csv(
583 self.peaklist_path, skiprows=to_skip, names=columns, sep=r"\s+"
584 )
585 return df
587 def check_assignments(self):
588 # self.df["ASS"] = self.df.
589 self.df["ASS"] = self.df.ASS.astype(object)
590 self.df.loc[self.df["ASS"].isnull(), "ASS"] = "None_dummy_0"
591 self.df["ASS"] = self.df.ASS.astype(str)
592 duplicates_bool = self.df.ASS.duplicated()
593 duplicates = self.df.ASS[duplicates_bool]
594 if len(duplicates) > 0:
595 console.print(
596 textwrap.dedent(
597 """
598 #############################################################################
599 You have duplicated assignments in your list...
600 Currently each peak needs a unique assignment. Sorry about that buddy...
601 #############################################################################
602 """
603 ),
604 style="yellow",
605 )
606 self.df.loc[duplicates_bool, "ASS"] = [
607 f"{i}_dummy_{num+1}" for num, i in enumerate(duplicates)
608 ]
609 if self.verbose:
610 print("Here are the duplicates")
611 print(duplicates)
612 print(self.df.ASS)
614 print(
615 textwrap.dedent(
616 """
617 Creating dummy assignments for duplicates
619 """
620 )
621 )
623 def check_peak_bounds(self):
624 columns_to_print = ["INDEX", "ASS", "X_AXIS", "Y_AXIS", "X_PPM", "Y_PPM"]
625 # check that peaks are within the bounds of spectrum
626 within_x = (self.df.X_PPM < self.f2_ppm_max) & (self.df.X_PPM > self.f2_ppm_min)
627 within_y = (self.df.Y_PPM < self.f1_ppm_max) & (self.df.Y_PPM > self.f1_ppm_min)
628 self.excluded = self.df[~(within_x & within_y)]
629 self.df = self.df[within_x & within_y]
630 if len(self.excluded) > 0:
631 print(
632 textwrap.dedent(
633 f"""[red]
634 #################################################################################
636 Excluding the following peaks as they are not within the spectrum which has shape
638 {self.data.shape}
639 [/red]"""
640 )
641 )
642 table_to_print = df_to_rich_table(
643 self.excluded,
644 title="Excluded",
645 columns=columns_to_print,
646 styles=["red" for i in columns_to_print],
647 )
648 print(table_to_print)
649 print(
650 "[red]#################################################################################[/red]"
651 )
653 def clusters(
654 self,
655 thres=None,
656 struc_el: StrucEl = StrucEl.disk,
657 struc_size=(3,),
658 l_struc=None,
659 ):
660 """Find clusters of peaks
662 :param thres: threshold for positive signals above which clusters are selected. If None then threshold_otsu is used
663 :type thres: float
665 :param struc_el: 'square'|'disk'|'rectangle'
666 structuring element for binary_closing of thresholded data can be square, disc or rectangle
667 :type struc_el: str
669 :param struc_size: size/dimensions of structuring element
670 for square and disk first element of tuple is used (for disk value corresponds to radius)
671 for rectangle, tuple corresponds to (width,height).
672 :type struc_size: tuple
675 """
676 peaks = [[y, x] for y, x in zip(self.df.Y_AXIS, self.df.X_AXIS)]
678 if thres == None:
679 thres = self.thres
680 self._thres = abs(threshold_otsu(self.data[0]))
681 else:
682 self._thres = thres
684 # get positive and negative
685 thresh_data = np.bitwise_or(
686 self.data[0] < (self._thres * -1.0), self.data[0] > self._thres
687 )
689 match struc_el:
690 case struc_el.disk:
691 radius = struc_size[0]
692 if self.verbose:
693 print(f"using disk with {radius}")
694 closed_data = binary_closing(thresh_data, disk(int(radius)))
696 case struc_el.square:
697 width = struc_size[0]
698 if self.verbose:
699 print(f"using square with {width}")
700 closed_data = binary_closing(thresh_data, square(int(width)))
702 case struc_el.rectangle:
703 width, height = struc_size
704 if self.verbose:
705 print(f"using rectangle with {width} and {height}")
706 closed_data = binary_closing(
707 thresh_data, rectangle(int(width), int(height))
708 )
710 case _:
711 if self.verbose:
712 print(f"Not using any closing function")
713 closed_data = thresh_data
715 labeled_array, num_features = ndimage.label(closed_data, l_struc)
717 self.df.loc[:, "CLUSTID"] = [labeled_array[i[0], i[1]] for i in peaks]
719 # renumber "0" clusters
720 max_clustid = self.df["CLUSTID"].max()
721 n_of_zeros = len(self.df[self.df["CLUSTID"] == 0]["CLUSTID"])
722 self.df.loc[self.df[self.df["CLUSTID"] == 0].index, "CLUSTID"] = np.arange(
723 max_clustid + 1, n_of_zeros + max_clustid + 1, dtype=int
724 )
726 # count how many peaks per cluster
727 for ind, group in self.df.groupby("CLUSTID"):
728 self.df.loc[group.index, "MEMCNT"] = len(group)
730 self.df.loc[:, "color"] = self.df.apply(
731 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
732 axis=1,
733 )
734 return ClustersResult(labeled_array, num_features, closed_data, peaks)
736 def mask_method(self, overlap=1.0, l_struc=None):
737 """connect clusters based on overlap of fitting masks
739 :param overlap: fraction of mask for which overlaps are calculated
740 :type overlap: float
742 :returns ClusterResult: Instance of ClusterResult
743 :rtype: ClustersResult
744 """
745 # overlap is positive
746 overlap = abs(overlap)
748 self._thres = threshold_otsu(self.data[0])
750 mask = np.zeros(self.data[0].shape, dtype=bool)
752 for ind, peak in self.df.iterrows():
753 mask += make_mask(
754 self.data[0],
755 peak.X_AXISf,
756 peak.Y_AXISf,
757 peak.X_RADIUS * overlap,
758 peak.Y_RADIUS * overlap,
759 )
761 peaks = [[y, x] for y, x in zip(self.df.Y_AXIS, self.df.X_AXIS)]
762 labeled_array, num_features = ndimage.label(mask, l_struc)
764 self.df.loc[:, "CLUSTID"] = [labeled_array[i[0], i[1]] for i in peaks]
766 # renumber "0" clusters
767 max_clustid = self.df["CLUSTID"].max()
768 n_of_zeros = len(self.df[self.df["CLUSTID"] == 0]["CLUSTID"])
769 self.df.loc[self.df[self.df["CLUSTID"] == 0].index, "CLUSTID"] = np.arange(
770 max_clustid + 1, n_of_zeros + max_clustid + 1, dtype=int
771 )
773 # count how many peaks per cluster
774 for ind, group in self.df.groupby("CLUSTID"):
775 self.df.loc[group.index, "MEMCNT"] = len(group)
777 self.df.loc[:, "color"] = self.df.apply(
778 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
779 axis=1,
780 )
782 return ClustersResult(labeled_array, num_features, mask, peaks)
784 def to_fuda(self):
785 fname = self.peaklist_path.parent / "params.fuda"
786 with open(self.peaklist_path.parent / "peaks.fuda", "w") as peaks_fuda:
787 for ass, f1_ppm, f2_ppm in zip(self.df.ASS, self.df.Y_PPM, self.df.X_PPM):
788 peaks_fuda.write(f"{ass}\t{f1_ppm:.3f}\t{f2_ppm:.3f}\n")
789 groups = self.df.groupby("CLUSTID")
790 fuda_params = Path(fname)
791 overlap_peaks = ""
793 for ind, group in groups:
794 if len(group) > 1:
795 overlap_peaks_str = ";".join(group.ASS)
796 overlap_peaks += f"OVERLAP_PEAKS=({overlap_peaks_str})\n"
798 fuda_file = textwrap.dedent(
799 f"""\
801# Read peaklist and spectrum info
802PEAKLIST=peaks.fuda
803SPECFILE={self.data_path}
804PARAMETERFILE=(bruker;vclist)
805ZCORR=ncyc
806NOISE={self.thres} # you'll need to adjust this
807BASELINE=N
808VERBOSELEVEL=5
809PRINTDATA=Y
810LM=(MAXFEV=250;TOL=1e-5)
811#Specify the default values. All values are in ppm:
812DEF_LINEWIDTH_F1={self.f1_radius}
813DEF_LINEWIDTH_F2={self.f2_radius}
814DEF_RADIUS_F1={self.f1_radius}
815DEF_RADIUS_F2={self.f2_radius}
816SHAPE=GLORE
817# OVERLAP PEAKS
818{overlap_peaks}"""
819 )
820 with open(fuda_params, "w") as f:
821 print(f"Writing FuDA file {fuda_file}")
822 f.write(fuda_file)
823 if self.verbose:
824 print(overlap_peaks)
827class ClustersResult:
828 """Class to store results of clusters function"""
830 def __init__(self, labeled_array, num_features, closed_data, peaks):
831 self._labeled_array = labeled_array
832 self._num_features = num_features
833 self._closed_data = closed_data
834 self._peaks = peaks
836 @property
837 def labeled_array(self):
838 return self._labeled_array
840 @property
841 def num_features(self):
842 return self._num_features
844 @property
845 def closed_data(self):
846 return self._closed_data
848 @property
849 def peaks(self):
850 return self._peaks
853class LoadData(Peaklist):
854 """Load peaklist data from peakipy .csv file output from either peakipy read or edit
856 read_peaklist is redefined to just read a .csv file
858 check_data_frame makes sure data frame is in good shape for setting up fits
860 """
862 def read_peaklist(self):
863 if self.peaklist_path.suffix == ".csv":
864 self.df = pd.read_csv(self.peaklist_path) # , comment="#")
866 elif self.peaklist_path.suffix == ".tab":
867 self.df = pd.read_csv(self.peaklist_path, sep="\t") # comment="#")
869 else:
870 self.df = pd.read_pickle(self.peaklist_path)
872 self._thres = threshold_otsu(self.data[0])
874 return self.df
876 def validate_peaklist(self):
877 self.df = pd.DataFrame(
878 [
879 PeaklistColumnsWithClusters(**i).model_dump()
880 for i in self.df.to_dict(orient="records")
881 ]
882 )
883 return self.df
885 def check_data_frame(self):
886 # make diameter columns
887 if "X_DIAMETER_PPM" in self.df.columns:
888 pass
889 else:
890 self.df["X_DIAMETER_PPM"] = self.df["X_RADIUS_PPM"] * 2.0
891 self.df["Y_DIAMETER_PPM"] = self.df["Y_RADIUS_PPM"] * 2.0
893 # make a column to track edited peaks
894 if "Edited" in self.df.columns:
895 pass
896 else:
897 self.df["Edited"] = np.zeros(len(self.df), dtype=bool)
899 # create include column if it doesn't exist
900 if "include" in self.df.columns:
901 pass
902 else:
903 self.df["include"] = self.df.apply(lambda _: "yes", axis=1)
905 # color clusters
906 self.df["color"] = self.df.apply(
907 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
908 axis=1,
909 )
911 # get rid of unnamed columns
912 unnamed_cols = [i for i in self.df.columns if "Unnamed:" in i]
913 self.df = self.df.drop(columns=unnamed_cols)
915 def update_df(self):
916 """Slightly modified to retain previous configurations"""
917 # int point value
918 self.df["X_AXIS"] = self.df.X_PPM.apply(lambda x: self.uc_f2(x, "ppm"))
919 self.df["Y_AXIS"] = self.df.Y_PPM.apply(lambda x: self.uc_f1(x, "ppm"))
920 # decimal point value
921 self.df["X_AXISf"] = self.df.X_PPM.apply(lambda x: self.uc_f2.f(x, "ppm"))
922 self.df["Y_AXISf"] = self.df.Y_PPM.apply(lambda x: self.uc_f1.f(x, "ppm"))
923 # in case of missing values (should estimate though)
924 self.df["XW_HZ"] = self.df.XW_HZ.replace(np.NaN, "20.0")
925 self.df["YW_HZ"] = self.df.YW_HZ.replace(np.NaN, "20.0")
926 # convert linewidths to float
927 self.df["XW_HZ"] = self.df.XW_HZ.apply(lambda x: float(x))
928 self.df["YW_HZ"] = self.df.YW_HZ.apply(lambda x: float(x))
929 # convert Hz lw to points
930 self.df["XW"] = self.df.XW_HZ.apply(lambda x: x * self.pt_per_hz_f2)
931 self.df["YW"] = self.df.YW_HZ.apply(lambda x: x * self.pt_per_hz_f1)
932 # makes an assignment column
933 if self.fmt == "a2":
934 self.df["ASS"] = self.df.apply(
935 lambda i: "".join([i["Assign F1"], i["Assign F2"]]), axis=1
936 )
938 # make default values for X and Y radii for fit masks
939 # self.df["X_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f2_radius
940 # self.df["Y_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f1_radius
941 self.df["X_RADIUS"] = self.df.X_RADIUS_PPM.apply(
942 lambda x: x * self.pt_per_ppm_f2
943 )
944 self.df["Y_RADIUS"] = self.df.Y_RADIUS_PPM.apply(
945 lambda x: x * self.pt_per_ppm_f1
946 )
947 # add include column
948 if "include" in self.df.columns:
949 pass
950 else:
951 self.df["include"] = self.df.apply(lambda x: "yes", axis=1)
953 # check assignments for duplicates
954 self.check_assignments()
955 # check that peaks are within the bounds of the data
956 self.check_peak_bounds()
957 self.validate_peaklist()
960def get_vclist(vclist, args):
961 # read vclist
962 if vclist is None:
963 vclist = False
964 elif vclist.exists():
965 vclist_data = np.genfromtxt(vclist)
966 args["vclist_data"] = vclist_data
967 vclist = True
968 else:
969 raise Exception("vclist not found...")
971 args["vclist"] = vclist
972 return args