From bae4fde1d39f29d19bda29b55f79e6a4033e29e9 Mon Sep 17 00:00:00 2001
From: nbisaria <nbisaria@namitas-mbp.att.net>
Date: Wed, 31 Dec 2014 10:23:18 -0800
Subject: [PATCH 1/2] Added Boostrapping, fixed issues with missing values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Edited IMlibs to deal with the fact that some of the binding series
data didn’t have 8 points and was interfering with the conversion into
an array.  All directories for saving are pointing to my home directory
and will need to be changed.
Edited variantFun to do the bootstrapping (which is still slow), and
extract the data to a new table.  Right now 20% of the data has NaNs
for error bars, they are either varaitns that only have one cluster, or
variants that for some reason have NaN as a variant number.  I’m
investigating the second reason currently.  The filtering of variants
is just the value > 0.5, but should be changed depending on what is
working well for you.
---
 LibraryTesting/BadVariants.txt |   0
 libs/IMlibs.py                 |  35 ++++++--
 libs/IMlibs.pyc                | Bin 19921 -> 21106 bytes
 libs/variantFun.py             | 156 +++++++++++++++++++++++++++++----
 scripts/analyzeVariants.py     |   8 +-
 scripts/findSeqDistribution.py |   2 +
 6 files changed, 173 insertions(+), 28 deletions(-)
 create mode 100644 LibraryTesting/BadVariants.txt

diff --git a/LibraryTesting/BadVariants.txt b/LibraryTesting/BadVariants.txt
new file mode 100644
index 0000000..e69de29
diff --git a/libs/IMlibs.py b/libs/IMlibs.py
index 071f890..bd0fcca 100644
--- a/libs/IMlibs.py
+++ b/libs/IMlibs.py
@@ -12,13 +12,13 @@
 import pandas as pd
 import scipy.io as sio
 import matplotlib.pyplot as plt
-import CPlibs
+#import CPlibs
 
 def spawnMatlabJob(matlabFunctionCallString):
     try:
         #construct the command-line matlab call 
         functionCallString =                      "try,"
-        functionCallString = functionCallString +      "addpath('{0}', '{1}');".format('/home/sarah/array_image_tools_SKD/libs', '/home/sarah/array_image_tools_SKD/scripts') #placeholder TEMP DEBUG CHANGE
+        functionCallString = functionCallString +      "addpath('{0}', '{1}');".format('/home/namita/Code/array_image_tools_SKD/libs', '/home/namita/Code/array_image_tools_SKD/scripts') #placeholder TEMP DEBUG CHANGE
         functionCallString = functionCallString +     matlabFunctionCallString + ';'
         functionCallString = functionCallString + "catch e,"
         functionCallString = functionCallString +     "disp(getReport(e,'extended'));"
@@ -142,7 +142,8 @@ def getFPfromCPsignal(signalNamesByTileDict):
     return bindingSeriesFilenameDict
 
 def pasteTogetherSignal(cpSeqFilename, signal, outfilename):
-    tmpfilename = 'signal_'+str(time.time())
+    output_directory = os.path.dirname(outfilename)
+    tmpfilename = os.path.join(output_directory, 'signal_'+str(time.time()))
     np.savetxt(tmpfilename, signal, fmt='%s')
     os.system("paste %s %s > %s"%(cpSeqFilename, tmpfilename, outfilename+'.tmp'))
     os.system("mv %s %s"%(outfilename+'.tmp', outfilename))
@@ -218,19 +219,19 @@ def sortConcatenateCPsignal(reducedSignalNamesByTileDict, barcode_col, sortedAll
     return
 
 def compressBarcodes(sortedAllCPsignalFile, barcode_col, seq_col, compressedBarcodeFile):
-    script = '/home/sarah/array_image_tools_SKD/scripts/compressBarcodes.py'
+    script = '/home/namita/Code/array_image_tools_SKD/scripts/compressBarcodes.py'
     print "python %s -i %s -o %s -c %d -C %d"%(script, sortedAllCPsignalFile, compressedBarcodeFile, barcode_col, seq_col)
     os.system("python %s -i %s -o %s -c %d -C %d"%(script, sortedAllCPsignalFile, compressedBarcodeFile, barcode_col, seq_col))
     return
 
 def barcodeToSequenceMap(compressedBarcodeFile, libraryDesignFile, outFile):
-    script = '/home/sarah/array_image_tools_SKD/scripts/findSeqDistribution.py'
+    script = '/home/namita/Code/array_image_tools_SKD/scripts/findSeqDistribution.py'
     print "python %s -b %s -l %s -o %s "%(script, compressedBarcodeFile, libraryDesignFile, outFile)
     os.system("python %s -b %s -l %s -o %s "%(script, compressedBarcodeFile, libraryDesignFile, outFile))
     return
 
 def matchCPsignalToLibrary(barcodeToSequenceFilename, sortedAllCPsignalFile, sequenceToLibraryFilename):
-    script = '/home/sarah/array_image_tools_SKD/scripts/matchCPsignalLibrary.py'
+    script = '/home/namita/Code/array_image_tools_SKD/scripts/matchCPsignalLibrary.py'
     print "python %s -b %s -i %s -o %s "%(script, barcodeToSequenceFilename, sortedAllCPsignalFile, sequenceToLibraryFilename)
     os.system("python %s -b %s -i %s -o %s "%(script, barcodeToSequenceFilename, sortedAllCPsignalFile, sequenceToLibraryFilename))
     return
@@ -338,9 +339,21 @@ def loadCPseqSignal(filename):
     """
     cols = ['tileID','filter','read1_seq','read1_quality','read2_seq','read2_quality','index1_seq','index1_quality','index2_seq', 'index2_quality','all_cluster_signal','binding_series']
     table = pd.read_csv(filename, sep='\t', header=None, names=cols, index_col=False)
+    #we ended here!!
+    #count = 0
+    #    
+    #binding_series = np.zeros((len(table),8))
+    #for series in table['binding_series']:
+    #    if (len(np.array(series.split(','), dtype=float)) ==8):
+    #        binding_series[count] = np.array(series.split(','), dtype=float)
+    #        count += 1
+    #import pdb; pdb.set_trace()
     binding_series = np.array([np.array(series.split(','), dtype=float) for series in table['binding_series']])
+    
     for col in range(binding_series.shape[1]):
+        print('newversion')
         table[col] = binding_series[:, col]
+  
     return table
 
 def loadNullScores(signalNamesByTileDict, filterSet):
@@ -361,7 +374,15 @@ def loadBindingCurveFromCPsignal(filename):
     then return the binding series, normalized by all cluster image.
     """
     table = pd.read_table(filename, usecols=(10,11))
-    binding_series = np.array([np.array(series.split(','), dtype=float) for series in table['binding_series']])
+    count = 0
+    binding_series = np.zeros((len(table),8))
+    for series in table['binding_series']:
+        if (len(np.array(series.split(','), dtype=float)) ==8):
+            binding_series[count] = np.array(series.split(','), dtype=float)
+            count += 1
+     
+    #binding_series = np.array([np.array(series.split(','), dtype=float) for series in table['binding_series']])
+    
     return binding_series, np.array(table['all_cluster_signal'])
 
 def loadFittedCPsignal(filename):
diff --git a/libs/IMlibs.pyc b/libs/IMlibs.pyc
index 81515fb3ce7ed94b027bdfeb85e3ddaa04a5d51c..1eba5aa10ee36228b66b80aa5cba7e0629e209a0 100644
GIT binary patch
delta 4421
zcmbVPdrX_x6+g!>umKyB7(XxuY;24}F>3;}<O#S*L?FqWc3C4;#NnFE)L^hpyRIOw
zBTb*tQr(uW>0{H{Kc-AfOTKQAR!!+vHPcj8&6=r`s&(C(sY>@ywDqqpP1!k@&~0g}
z@nQIW{(bk}^SI|e&hc-a#(%zmb;b{?Z0BE-w~oNIkCDE&(Nl0fR`+1Z2uA=8I6;^c
zsR3bDWDN+5B5OgEDdGZAu1GBi0|+Y|K{()1oL+5k!~h4pAS%=tABajt>OfQ};s;?@
zq#lGrkpPJ5l41=YoQgDps8OT|#2Q6{Al51p0^w4m8APojVGwRbA|O19w1Ds`5(VK?
zq!mP+A~6ttMb?3+SELO@07^Xj8<r>0peE4{qEU^AgJ@Eu14K}fFM$Xta-$MyRxwx)
zBCHxWfQTs438F=jE)Y>gx|dC~Dib$>h^cnl%^=n()}wV9K0#L-txWuDx$$&-A~l{V
zrsDlMk(Pf7*VPu{pWAgR8qgj!oi;>vaQAJ6biNRuIFu=*@|jdTksFyFPfrvJ@xj!g
zWmOTmcdJReyKIk9kIWU@KUO@vFQ$bny}Ddl2$sux?LS_ziEu<a=<uSB@$z`DNuI90
zx^j`QcG0<@$AtFknm_6pFW+{3v~qVMav@kI9qzaEiImHrCsZ=i?y%6)Pfy`#f+D~e
z!k7VO3^0c<Yg}$I!i*6FLeT_8GmM#G4tRij&H%I2WP!0Vm@&Z&sx57VIRhdqE9X08
z22>wi2xf4P8ljdXxi`e!7PiG5^fhYuw2!-3r99_Jp;a!{cx0Q`S4Eb{)q4uX^mx=P
zzv>O#R!t4L>Eh&caad&X>5*bCe^0T3=Q2Jyn#rc=#H6G23JMB34#cZoztzr?Sc75h
zlJ^ASl$@)3R8OQ6{<Zj&yxo6k#X@~C`HlJ-{E>F7ezUG^!#mm7;K$RlvEjsu&4nZC
z2z*F_XJmEb6M6*SZuH{MNGKt1YPwyI;JKy%{#*&R2KVR@TnM`GW%)|*2l%kehQfM8
z&xM-s741^Ut}CeP!cF*VITXID^Wo(!q;ntj;Tu|WB!*a$ds>Dqlv*HppM0?;rpNg;
z(K>un#-jxnAJxyMf>Y7)uPDtYsJmE3P)JV=kL0q+quihl%}72oSzO&om}^~&@5t}8
zW?b99z<h&@#|H7JoQXv`ze{6QXXpKx2QSLSSjMG}>1Q(Dsc3_oUpJI^kSrwCe0f!p
zt2fxy=ED!PFSmJgrKRWEEAX;B+5V!QC6sYDek^|&m-Z*f=OPzeyHmT9^K!7GnXZVb
z4u8))Pv$2CtBc-S9Zp2;QpZP#-<I#)xL=QV6YD*6%RaFFTll)PZAj=5o!j6;xAxqI
zEm$qtcb-{tnBhtJw@!cOGpnA^U{?eBWwvV+FUSjBM|VHTN_Vq5Wn(M9{()ijCVtvo
zk0I^1-R~oQQ@(U_mMlek+VxETk)8lX<;k7|o|l(<wqdR8*qH3=fnqr)y)i47s$NQX
zi95nDYvy`&$4=^?a+Ut?r%Y?+wCAPlM{Lv7^RS=jzNlL}yD^4Aj%YH^@MnSy5)^F&
z6Y0C}qM|O7n~)!G8rsiMpu9|wI+RT>o%eJ;lP)B$a3hC{E`!x=tAh?xB%4c#{=Iak
z?8_XUNM-ku9|e^fO)+e-Z87-Ji~)oE-R5`EAkS>M1sCPjEdf1Y+`F}=bePq0J_od*
z4ejW|N~}XCj_|zyr&xK38kUL`DlVulp<ftth_ggYFl&IK0mi6yQ3VObp2`b~F%c?g
zzJ8~wQY_$2+%c?*7ph-qxEV^kUKTDpSfrfqQPR02ey0KQz1PNl?l3BkXx%o^LqCGL
z$%Uz2{@+jSOWL_KvZ<XwuI!6e(gX_c5ZD^&lbcLWG)l*|_evRRf=oGqTDt4jKKU=|
zh#GhZ?}7~1IZ-uu>A=xzvY8>w5MpR$XkoZY5alQ))yY|8_IC!}BT1kbM}dD%sBNOB
zP&(e*=*@q+b`}?{9<Lg0Uv!$}+1`bca+|ZzwTpq+(srGm%%+nm?y!=A<R@(YIv3Rr
zc2Y+wn;jm>PE+ldAI^-Y4yTi-d}bMX;EP?Zy6)Fb-zGdF1O1Qpyi8)rUlJ$<|3}Mp
zN~m(W(qE0ow93Tqk?NoiZpl_u@g$a%%7fcqN4sp_aY4^9=-TPP$F=P{|AIcH>lf_c
zl0s%@8Rq4{Kr`jz+&~0hlk)@didv3?mHQUu`2ioEm46)w(&b^_rK?1ulP<Hn9C%XB
z><Z&X`Tbp^_$z5044XV;KyDt~T$*Ad_cF{f%rV^0@BqU@4397zV>r(67{jv+-(z^5
z;RS|M3>w23hBp}AWO$e1BEw|{1IM(S!HR=~l}zmnF@|n}|E!+wCA@XW=^G_87DFXn
gHvj$SK!;J@H{_<g_|(u16>e*VwZ>|7lv!*44Mg*wNB{r;

delta 3217
zcma)8eQZ-z6hEikTDq2%v90UYt#n)4bsgK3Ip)A^z=jgRZ9GB45?u<nlR_tDEed9{
zMus3EDje`TV*E%nhD5C)F)@7jhfz=xG|`x#7{6ltLnFp0OvH2Um;`(V+rD3a@1Bo)
z?z#7!^Ul14C!ax|_1EbItGl<aSOXV7R{meWSGsDqZ$}nESs+;i14W>$Fb*&<6I7li
z#h~&vDFJ2Eq!g506A!3qnv{vTa=<vkK)L38TL9x07^nc{)H*&;E=?*yP1mFfRG}vR
zEGV~Tvp~(zBmk;NlWI^iHK_qrtVu1X5>0}jN;L_A@@NtURi;TDD6b|FQ01D`gR0OZ
z3d*NR1E@+(W<yq0Y1RnJuSwI?B4+7DG=mE0@oPX;YjQ298cpVas@0@Ln*?<Zt^*a)
zgRP*#n#=`Nr%4;Ah$h!hO;Hcy9!^v{sHhgq1J$6({9M%X7rMi|OU-LH49A<(v60wX
z`o-&;k*@#OU|DWk-rE-UHUAQXb35&~TFnGh@BGDVJU8U3GV|0#pB27tYVe7>9L)r@
zKVYY>qH`v)P;PwY0aK&gSjktW0(#YR*2GgI{Z{s=$@ubY9w)=bS2thj`xq2L1|Wm5
z4q+?6R)n%KD`YIN&I+Ua)6W4v0b4DInLJpRFHgZJXc-^rd?*{Tf``qfq&66x9Ou6w
z%Vi=vSVpb%ZFvlBG*IDRWlZM)G7?UDOF9!Dj!1OIGJ&{AoD3T_Wrh;T_}#JLxW3N}
zr>5>BECvl}lRc)?oII-WdF?JyLr<>T_cUUPzO3A3l9@0atSZ4L>3G!-rqT<34<5*Q
zW_6fv!~Q@3-=HIbr%jDz%MVnS<3T!B+e6Np<)&J%)Rg1fTEkYm%2eaET0g$S8e}4E
zDokWDadqRxTpgyv!B_BMS{n+PD2M6uP&K}rn+O$}uV-alHNH>x*4^WOcq+&Lo#-Wz
zN<5K^N1}-L(_{66Ha<sSJV3wIM@{EmANAo$+8#~2J6Q4Z2f2*G4Yze^2T!puV`oUm
zHx91ZkTkZ*fIcm2MiQyaRkr_+h7vqQ(`P5#3)t?;SzegE0=Locv%~)9Sb7y#n;X6O
zHSKOpxEGqZ3OB86+|J{Mj#~0%{12bsHC5mbZ#H|)s!G@;%N|PHt(L0vPn5d$Ez>-v
z=9J+C?U+M_ds#hWW$2Cd8vCfeCCEp)x23XnpG5N(!&O#ztfdGccdF$q;v@8F>+PoV
zZJF!EX|!kVgZLqpw)L3myxdm7kJ^#8PMonvrj?+KL4n7qu)Wg%%++Jr);<e;^m6+U
zeoU@;54Alm*1ck<ops6W7Xtsomu-F(*5rcozemiHvm?naH+M9dMt-Klk9Abk*@MUF
zbX$<_>#W35dZN>)ZiUP=80FgMkUm?^wXWxm0F33!824LMO71QnuezZtip8>OW23+~
z3<+eQdl%f|lZZ3<0<qP}_~bE;k0j!0;~bq{;4uwp;lkq5KK78-B`hzBb=Zj&=to+=
z@H7G~U9=d_)9yunQ|pOEGjV_lyQ_53mlM^^S9+bq08;L`igWen7nZGvW84OG&A*FF
zI}heTdCwg|7_&^Z0v7UW^QA%XOAoJvjqbF-#;}ZV$KYOZv1zb^U1y1_|E&=RuYaQZ
zOx8HZ^Il4fah7i*7KTKDfRy@(Jlhu!i?E=8v~{CSV1i+?L+B%LUdH|s5YPX8zMEqs
zBe5;?yEi~b7YE#N$)`*Nwo=X}om+e$t7Ex*^2x2*luE{pn9N~kGviO0NcPaU6oE{h
zDPo8vlY?uLo4AjS3?_zScg2khqR!VOT-l>0;}TVx@K(~Qn>@IIQaA0bJ<Jxy(J3?S
z;7VJ5;|Y^mG6Of~HZ3`Y+%}AxlTN3^IB88E-24%`=!vDDm>!LT%iNgFy|?T~tk6bB
zWXgZ$H4e$`K04ML<O=$8Zy1k}r?1&r!v1AH!8lL-eHD0!*7wz5fF9{H?=ctpig6zm
z_lNKXYVRMy_vrclklVvv^zrZRPbOBU>4W}`Y)YKU+lawu5{w50_>~)shXi&A>=f7~
z@TkCJ0?!KU7kEM7MS<4^-V`_}a6;guz?TAF3;ZMik}R8m9a|X-gt`PG0<8>}_Rf40
zPA+e|ZY}$=SzLJe*Ntv;SsZl^ufyta^JR18y9*sQ{hdbhj7F!^;dB%@9B#YA^Ec_8
B3lRVS

diff --git a/libs/variantFun.py b/libs/variantFun.py
index 308b26a..83f3a98 100644
--- a/libs/variantFun.py
+++ b/libs/variantFun.py
@@ -10,36 +10,93 @@
 import plotfun
 import scipy.io as sio
 import matplotlib.pyplot as plt
-import CPlibs
+#import CPlibs
+import scikits.bootstrap
+import scipy as scipy
+from multiprocessing import Pool
+import functools
 
 class Parameters():
     def __init__(self):
-    
         # save the units of concentration given in the binding series
         self.RT = 0.582  # kcal/mol at 20 degrees celsius
         self.min_deltaG = -12
         self.max_deltaG = -3
-        
+
+                
 def perVariantInfo(table):
-    variants = np.arange(0, np.max(table['variant_number']))
-    columns = [name for name in table][:12] + ['numTests', 'numRejects', 'kd', 'dG', 'fmax', 'fmin']
+    #variants_1 = np.arange(0, np.max(table['variant_number']),dtype=int)
+    variants = range(0,int(np.max(table['variant_number'])))
+    #something is wrong with 595,681
+    wd = '/home/namita/RigidAnalysis' 
+    filename = os.path.join(wd, 'BadVariants.txt')
+    f = open(filename, 'w')
+
+    columns = [name for name in table][:12] + ['numTests', 'numRejects', 'kd', 'dG', 'fmax', 'fmin','qvalue', 'errdG_L', 'errdG_R', ]
     newtable = pd.DataFrame(columns=columns, index=np.arange(len(variants)))
-    for i, variant in enumerate(variants):
-        if i%1000==0: print 'computing iteration %d'%i
-        sub_table = table[table['variant_number']==variant]
-        if len(sub_table) > 0:
-            sub_table_filtered = filterFitParameters(sub_table)[['kd', 'dG', 'fmax', 'fmin']]
-            newtable.iloc[i]['numTests'] = len(sub_table_filtered)
-            newtable.iloc[i]['numRejects'] = len(sub_table) - len(sub_table_filtered)
-            newtable.iloc[i]['kd':] = np.median(sub_table_filtered, 0)
-            newtable.iloc[i][:'total_length'] = sub_table.iloc[0][:'total_length']
-    return newtable
+    pool = Pool(processes=20)   
+    datapervar= pool.map(functools.partial(generateVarStats, table, f), variants)
+    #newtable.iloc[variants] = pool.map(functools.partial(generateVarStats, table), variants)
+    pool.close()
+    f.close()
+    print('here!')
+    for variant in variants:
+        #this is really slow, there's got to be a better way of unpacking datapervar
+        if (variant % 1000) == 0:
+            print(variant)
+        newtable.iloc[variant] = datapervar[variant].iloc[0]
+    
+    return newtable    
+    
+def versionNum():
+    print('v3')
+    
+    
+    
+def generateVarStats(table,f,variant):
+    sub_table = table[table['variant_number']==variant]
+    #how do you intialize a list?
+   # print(variant)
+   #doesn't like 15356
+    columns = [name for name in table][:12] + ['numTests', 'numRejects', 'kd', 'dG', 'fmax', 'fmin','qvalue', 'errdG_L', 'errdG_R', ]
+    newtable_pervar = pd.DataFrame(columns=columns, index = xrange(0,1))
+    if len(sub_table) > 0:        
+        sub_table_filtered = filterFitParameters(sub_table)[['kd', 'dG', 'fmax','qvalue', 'fmin']]
+        if not(sub_table_filtered.empty):
+            newtable_pervar.iloc[0]['numTests'] = len(sub_table_filtered)
+            newtable_pervar.iloc[0]['numRejects'] = len(sub_table) - len(sub_table_filtered)
+            newtable_pervar.iloc[0]['kd':'qvalue'] = np.median(sub_table_filtered, 0)
+            newtable_pervar.iloc[0][:'total_length'] = sub_table.iloc[0][:'total_length']
+        if len(sub_table_filtered) > 1:
+                    #get bootstrapped error bars on mean, would have liked to do median but get errors...   
+                    try:
+                        if (variant % 1000) == 0:
+                            print(variant)
+                        bootval = scikits.bootstrap.ci(data=sub_table_filtered['dG'], statfunction=np.median, method='pi')    
+                        newtable_pervar.iloc[0]['errdG_L'] = bootval[0]-np.median(sub_table_filtered, 0)[1]
+                        newtable_pervar.iloc[0]['errdG_R'] = bootval[1]-np.median(sub_table_filtered, 0)[1]
+                    except IndexError:
+                        print('value error')
+                        print(variant)
+                        indexbad = np.array(variant)
+                        np.savetxt(filename, indexbad)
+                        #could also use 
+                        f.write('%d\t\n'%variant)
+                        
+                        #save to text file the name of the variant, could do an np.savetxt,         
+                    
+                  
+    return newtable_pervar
         
 
 def filterFitParameters(sub_table):
-    sub_table = sub_table[sub_table['fit_success']==1]
+    #sub_table = sub_table[sub_table['fit_success']==1]
     sub_table = sub_table[sub_table['rsq']>0.5]
-    sub_table = sub_table[sub_table['fraction_consensus']>=67] # 2/3 majority
+    #find where the binding series starts
+    firstconc = int(np.where(sub_table.columns == 0)[0])
+    numconcs = 8
+    #filters out anything that has a single NaN concentration
+    sub_table = sub_table[sub_table.iloc[:,firstconc:firstconc+numconcs].isnull().sum(1) == 0 ]
     return sub_table
 
 def bindingCurve(concentrations, kd, fmax=None, fmin=None):
@@ -262,7 +319,6 @@ def plot_length_changes_helices(table, variant_table, topology, loop=None, recep
     # choose just one sequence of that topology to look at
     seq1 = variant_table[criteria_central]['junction_sequence'].iloc[0]
     criteria_central = np.all((criteria_central, np.array(variant_table['junction_sequence']) == seq1), axis=0)
-    
     sub_table = variant_table[criteria_central]
     helix_context = np.unique(sub_table['helix_context'])
     total_lengths = np.array([8,9,10,11,12])
@@ -286,4 +342,66 @@ def plot_length_changes_helices(table, variant_table, topology, loop=None, recep
     ax.set_ylabel('delta delta G')
     ax.set_ylim((-1, 5))
     plt.tight_layout()
-    return
\ No newline at end of file
+    return
+    
+#def helixContextAnalysis(table, variant_table, topology, loop = 'goodLoop', receptor = 'R1', offset = 0):
+#    if loop is None:
+#        loop = 'goodLoop'
+#    if receptor is None:
+#        receptor='R1'
+#    if offset is None:
+#        offset = 0  # amount to change helix_one_length by from default
+#    couldPlot = True
+#    criteria_central = np.all((np.array(variant_table['receptor'] == receptor),
+#                           np.array(variant_table['loop']==loop)),
+#                          axis=0)
+#    length = 10
+#    if topology == '':
+#        criteria_central = np.all((criteria_central, np.array(variant_table['junction_sequence'] =='_'),np.array(variant_table['total_length']==length)), axis=0)
+#    else:
+#        criteria_central = np.all((criteria_central, np.array(variant_table['topology']==topology),np.array(variant_table['total_length']==length)), axis=0)
+#    # choose just one sequence of that topology to look at
+#    #seq1 = variant_table[criteria_central]['junction_sequence'].iloc[0]
+#    #criteria_central = np.all((criteria_central, np.array(variant_table['junction_sequence']) == seq1), axis=0)
+#    sub_table = variant_table[criteria_central]
+#    helix_context = np.unique(sub_table['helix_context'])
+#    delta_G_initial = variant_table.loc[0]['dG']
+#    newtable = pd.DataFrame(columns=['helix_context','dG', 'ddG', 'likelihood'], index = xrange(0,len(sub_table)))
+#    count = 0
+#    for i, sequence in enumerate(helix_context):
+#        dG = sub_table[np.all((np.array(sub_table['helix_context']==sequence)),axis=0)]['dG']
+#        count = count + len(dG)
+#        delta_deltaG = dG - delta_G_initial
+#        newtable.iloc[i]['helix_context'] = sequence
+#        newtable.iloc[i]['dG'] =  dG.iloc[0]
+#        newtable.iloc[i]['ddG'] = delta_deltaG
+#        
+#        
+#    #next time load mploadtext. 
+#    for i, sequence in enumerate(np.unique(newtable['helix_context'])):
+#        if sequence == 'h02':
+#            newtable.iloc[i]['likelihood'] = -4.62
+#        if sequence == 'h06':
+#            newtable.iloc[i]['likelihood'] = -5.16
+#        if sequence == 'h08':
+#            newtable.iloc[i]['likelihood'] = -5.30
+#        if sequence == 'h10':
+#            newtable.iloc[i]['likelihood'] = -5.54
+#        if sequence == 'h12':
+#            newtable.iloc[i]['likelihood'] = -5.64
+#        if sequence == 'h14':
+#            newtable.iloc[i]['likelihood'] = -5.67
+#        if sequence == 'h16':
+#            newtable.iloc[i]['likelihood'] = -6.15
+#        if sequence == 'h21':
+#            newtable.iloc[i]['likelihood'] = -6.44
+#        if sequence == 'h25':
+#            newtable.iloc[i]['likelihood'] = -6.57
+#        if sequence == 'h28':
+#            newtable.iloc[i]['likelihood'] = -6.76
+            
+        
+            
+    
+    
+    
diff --git a/scripts/analyzeVariants.py b/scripts/analyzeVariants.py
index 1f45496..c9aca43 100644
--- a/scripts/analyzeVariants.py
+++ b/scripts/analyzeVariants.py
@@ -21,6 +21,8 @@
 import pandas as pd
 import variantFun
 import IMlibs
+import multiprocessing
+import plotfun
 parameters = variantFun.Parameters()
 
 #set up command line argument parser
@@ -38,14 +40,16 @@
 args = parser.parse_args()
 
 # outdirectory
-imageDirectory = 'imageAnalysis/reduced_signals/barcode_mapping/figs'
+#imageDirectory = 'imageAnalysis/reduced_signals/barcode_mapping/figs'
+imageDirectory  = '/home/namita/RigidAnalysis/figs'
+
 # load concentrations
 xValuesFilename, fluor_dirs_red, fluor_dirs, concentrations = IMlibs.loadMapFile(args.CPfluor_dirs_to_quantify)
 
 # load table
 fittedBindingFilename = args.CPfitted
 table = IMlibs.loadFittedCPsignal(fittedBindingFilename)
-table['dG'] = parameters.RT*np.log(table['kd']*1E-9)
+#table['dG'] = parameters.RT*np.log(table['kd']*1E-9)
 
 # get another dict that gives info on a per variant basis
 variantFittedFilename = os.path.splitext(fittedBindingFilename)[0]+'.perVariant.fitParameters'
diff --git a/scripts/findSeqDistribution.py b/scripts/findSeqDistribution.py
index 1f4b826..815ec4e 100644
--- a/scripts/findSeqDistribution.py
+++ b/scripts/findSeqDistribution.py
@@ -10,6 +10,8 @@
 import numpy as np
 from optparse import OptionParser
 import matplotlib.pyplot as plt
+import matplotlib 
+matplotlib.use('Agg')
 import pandas as pd
 import sys
 import os

From 567a6ad03e12107e660d271bd657148befe22c8b Mon Sep 17 00:00:00 2001
From: nbisaria <nbisaria@dn52emja.sunet>
Date: Tue, 6 Jan 2015 19:12:20 -0800
Subject: [PATCH 2/2] Cleaned up comments

Got ride of non-sensical comments and debugging code
---
 libs/IMlibs.py     |  3 ---
 libs/variantFun.py | 67 ++--------------------------------------------
 2 files changed, 2 insertions(+), 68 deletions(-)

diff --git a/libs/IMlibs.py b/libs/IMlibs.py
index bd0fcca..cdc9db5 100644
--- a/libs/IMlibs.py
+++ b/libs/IMlibs.py
@@ -339,7 +339,6 @@ def loadCPseqSignal(filename):
     """
     cols = ['tileID','filter','read1_seq','read1_quality','read2_seq','read2_quality','index1_seq','index1_quality','index2_seq', 'index2_quality','all_cluster_signal','binding_series']
     table = pd.read_csv(filename, sep='\t', header=None, names=cols, index_col=False)
-    #we ended here!!
     #count = 0
     #    
     #binding_series = np.zeros((len(table),8))
@@ -347,11 +346,9 @@ def loadCPseqSignal(filename):
     #    if (len(np.array(series.split(','), dtype=float)) ==8):
     #        binding_series[count] = np.array(series.split(','), dtype=float)
     #        count += 1
-    #import pdb; pdb.set_trace()
     binding_series = np.array([np.array(series.split(','), dtype=float) for series in table['binding_series']])
     
     for col in range(binding_series.shape[1]):
-        print('newversion')
         table[col] = binding_series[:, col]
   
     return table
diff --git a/libs/variantFun.py b/libs/variantFun.py
index 83f3a98..5354126 100644
--- a/libs/variantFun.py
+++ b/libs/variantFun.py
@@ -25,9 +25,7 @@ def __init__(self):
 
                 
 def perVariantInfo(table):
-    #variants_1 = np.arange(0, np.max(table['variant_number']),dtype=int)
     variants = range(0,int(np.max(table['variant_number'])))
-    #something is wrong with 595,681
     wd = '/home/namita/RigidAnalysis' 
     filename = os.path.join(wd, 'BadVariants.txt')
     f = open(filename, 'w')
@@ -36,7 +34,6 @@ def perVariantInfo(table):
     newtable = pd.DataFrame(columns=columns, index=np.arange(len(variants)))
     pool = Pool(processes=20)   
     datapervar= pool.map(functools.partial(generateVarStats, table, f), variants)
-    #newtable.iloc[variants] = pool.map(functools.partial(generateVarStats, table), variants)
     pool.close()
     f.close()
     print('here!')
@@ -47,12 +44,7 @@ def perVariantInfo(table):
         newtable.iloc[variant] = datapervar[variant].iloc[0]
     
     return newtable    
-    
-def versionNum():
-    print('v3')
-    
-    
-    
+        
 def generateVarStats(table,f,variant):
     sub_table = table[table['variant_number']==variant]
     #how do you intialize a list?
@@ -344,62 +336,7 @@ def plot_length_changes_helices(table, variant_table, topology, loop=None, recep
     plt.tight_layout()
     return
     
-#def helixContextAnalysis(table, variant_table, topology, loop = 'goodLoop', receptor = 'R1', offset = 0):
-#    if loop is None:
-#        loop = 'goodLoop'
-#    if receptor is None:
-#        receptor='R1'
-#    if offset is None:
-#        offset = 0  # amount to change helix_one_length by from default
-#    couldPlot = True
-#    criteria_central = np.all((np.array(variant_table['receptor'] == receptor),
-#                           np.array(variant_table['loop']==loop)),
-#                          axis=0)
-#    length = 10
-#    if topology == '':
-#        criteria_central = np.all((criteria_central, np.array(variant_table['junction_sequence'] =='_'),np.array(variant_table['total_length']==length)), axis=0)
-#    else:
-#        criteria_central = np.all((criteria_central, np.array(variant_table['topology']==topology),np.array(variant_table['total_length']==length)), axis=0)
-#    # choose just one sequence of that topology to look at
-#    #seq1 = variant_table[criteria_central]['junction_sequence'].iloc[0]
-#    #criteria_central = np.all((criteria_central, np.array(variant_table['junction_sequence']) == seq1), axis=0)
-#    sub_table = variant_table[criteria_central]
-#    helix_context = np.unique(sub_table['helix_context'])
-#    delta_G_initial = variant_table.loc[0]['dG']
-#    newtable = pd.DataFrame(columns=['helix_context','dG', 'ddG', 'likelihood'], index = xrange(0,len(sub_table)))
-#    count = 0
-#    for i, sequence in enumerate(helix_context):
-#        dG = sub_table[np.all((np.array(sub_table['helix_context']==sequence)),axis=0)]['dG']
-#        count = count + len(dG)
-#        delta_deltaG = dG - delta_G_initial
-#        newtable.iloc[i]['helix_context'] = sequence
-#        newtable.iloc[i]['dG'] =  dG.iloc[0]
-#        newtable.iloc[i]['ddG'] = delta_deltaG
-#        
-#        
-#    #next time load mploadtext. 
-#    for i, sequence in enumerate(np.unique(newtable['helix_context'])):
-#        if sequence == 'h02':
-#            newtable.iloc[i]['likelihood'] = -4.62
-#        if sequence == 'h06':
-#            newtable.iloc[i]['likelihood'] = -5.16
-#        if sequence == 'h08':
-#            newtable.iloc[i]['likelihood'] = -5.30
-#        if sequence == 'h10':
-#            newtable.iloc[i]['likelihood'] = -5.54
-#        if sequence == 'h12':
-#            newtable.iloc[i]['likelihood'] = -5.64
-#        if sequence == 'h14':
-#            newtable.iloc[i]['likelihood'] = -5.67
-#        if sequence == 'h16':
-#            newtable.iloc[i]['likelihood'] = -6.15
-#        if sequence == 'h21':
-#            newtable.iloc[i]['likelihood'] = -6.44
-#        if sequence == 'h25':
-#            newtable.iloc[i]['likelihood'] = -6.57
-#        if sequence == 'h28':
-#            newtable.iloc[i]['likelihood'] = -6.76
-            
+