Source code for sincei.FragmentFFT

import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt

import scipy as sp
import scipy.fftpack
import numpy as np
import pandas as pd


## fast fouriour transform
[docs]def ffttable(selected): r"""Computes the FFT of the fragment length distribution Parameters ---------- selected : numpy array Array of fragment lengths Returns ------- pandas dataframe Dataframe with two columns. The first column contains the frequencies and the second column the power spectrum. Examples -------- >>> test = Tester() >>> c = CountReadsPerBin([test.bamFile1, test.bamFile2], 50, 4) >>> num_reads_per_bin, regionList = c.run() >>> selected = num_reads_per_bin[:,0] >>> ffttable(selected) freq value 0 0.0 7.812500e+06 1 1.0 1.812500e+06 2 2.0 1.812500e+06 3 3.0 1.812500e+06 4 4.0 1.812500e+06 5 5.0 1.812500 """ selected = np.log2(selected + 1) selected2 = [y - x for x, y in zip(selected, selected[1:])] fragment_fft = sp.fftpack.fft(selected2) fragment_psd = np.abs(fragment_fft) ** 2 fftfreq = sp.fftpack.fftfreq(len(fragment_psd), 10.0) d = pd.DataFrame({"freq": fftfreq, "value": fragment_psd}) d2 = d[d.freq > 0] return d2
# get fragment size disribution and fft value from dict{barcode:fragment_size_list}
[docs]def fragment_distribution(fragment_len_dict, length_plot): r"""Plot fragment length distribution Parameters ---------- fragment_len_dict : dict Dictionary containing the fragment length for each barcode. length_plot : str File name to save the plot. Returns ------- dict Dictionary containing the top 20 frequencies for each barcode. Examples -------- >>> test = Tester() >>> fragment_len_dict = {'AAACCTGAGAGGTTCT': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000], ... 'AAACCTGAGAGGTTCT': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]} >>> fragment_distribution(fragment_len_dict, test.length_plot) """ plt.style.use("classic") fig = plt.figure() ax1 = fig.add_subplot(1, 1, 1) outdict = dict.fromkeys(fragment_len_dict.keys()) for barcode in outdict.keys(): fragment_len = fragment_len_dict[barcode] n, bins, patches = ax1.hist(fragment_len, bins=100, color="orange", range=(0, 1000), alpha=0.3) # calculating fft dflist = [] for num in range(80, 101, 1): dflist.append(ffttable(n[10:num])) d2 = pd.concat(dflist, ignore_index=True) d2 = d2.sort_values(by=["freq"], ascending=False) outdict[barcode] = d2 # plot fragment sizes ax1.xaxis.set_ticks_position("bottom") ax1.yaxis.set_ticks_position("left") plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0)) plt.xlabel("Fragment length", fontsize=20) plt.ylabel("Fragment count", fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax1.set_title("Fragment distribution", fontsize=20) plt.savefig(length_plot, dpi=300, bbox_inches="tight") plt.close(fig) return outdict
## plot the fragment periodicity per barcode using the output of
[docs]def fftplot(outdict, plot): r"""Computes the periodicity of the fragment distribution Parameters ---------- outdict : dict Dictionary containing the fragment distribution for each barcode plot : str File name to save the plot Returns ------- dict Dictionary containing the periodicity of the fragment distribution for each barcode Examples -------- >>> test = Tester() >>> d = test.get_fragment_distribution() >>> fftplot(d, 'test.png') """ plt.style.use("classic") fig = plt.figure() ax = fig.add_subplot(1, 1, 1) periodicity = dict.fromkeys(outdict.keys()) for barcode in outdict.keys(): d2 = outdict[barcode] max_y_pos = 1 / d2.loc[d2.value.idxmax(), "freq"] mean_value = np.mean(d2.value) ## color barcodes where max periodicity is not between 120 and 160, or 240 and 320 p1 = range(120, 160, 1) p2 = range(240, 320, 1) mononuc_periodicity = np.mean(d2.loc[[int(x) in p1 for x in 1 / d2.freq], "value"]) dinuc_periodicity = np.mean(d2.loc[[int(x) in p2 for x in 1 / d2.freq], "value"]) # if int(mean_value) in p1: # col='red' # elif int(mean_value) in p2: # col='blue' # else: col = "grey" ax.plot(1 / d2.freq, 10 * np.log10(d2.value + 1), color=col, alpha=0.2) periodicity[barcode] = [mononuc_periodicity, dinuc_periodicity] # ax.axvline(max_y_pos,color = 'r',linestyle= '--') plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax.set_xlim(0, 400) ax.set_xlabel("Period (bp)", fontsize=20) ax.set_ylabel("Power", fontsize=20) ax.set_title("Period of fragment distribution", fontsize=20) plt.savefig(plot, dpi=300, bbox_inches="tight") return periodicity