Showing 1 of 1 files from the diff.

@@ -309,10 +309,17 @@
Loading
309 309
    for i, file_ene in enumerate(enelist):
310 310
        rel_enes.append([])
311 311
        for mol_ene in file_ene:
312 -
            if method == 'subtract':
313 -
                rel = mol_ene - mol_ene[ref_conf_int]
314 -
            elif method == 'divide':
315 -
                rel = mol_ene/mol_ene[ref_conf_int]
312 +
            try:
313 +
                if method == 'subtract':
314 +
                    rel = mol_ene - mol_ene[ref_conf_int]
315 +
                elif method == 'divide':
316 +
                    rel = mol_ene/mol_ene[ref_conf_int]
317 +
318 +
            # for when ref_conf_int == 'all' which may call
319 +
            # estimate_variability(r) for r higher than nconfs
320 +
            except IndexError:
321 +
                #print('blank list at {} for ref {}'.format(i, ref_conf_int))
322 +
                rel = []
316 323
            rel_enes[i].append(rel)
317 324
    return rel_enes
318 325
@@ -363,7 +370,8 @@
Loading
363 370
        cvlist = []
364 371
365 372
        # check that at least two conformers exist
366 -
        num_confs = mols_ene[0].shape[0]
373 +
        # call np.array in case empty list is inside from relative_energies
374 +
        num_confs = np.array(mols_ene[0]).shape[0]
367 375
        if num_confs <= 1:
368 376
            print(f"skipping molecule {i} since only 0 or 1 conformer")
369 377
            spread_by_mol.append(np.nan)
@@ -623,7 +631,7 @@
Loading
623 631
    return wholedict
624 632
625 633
626 -
def survey_energies(wholedict, mol_slice=[], outfn='relene.dat', ref_conf=0):
634 +
def survey_energies(wholedict, mol_slice=[], outfn='relene.dat', ref_conf='all'):
627 635
    """
628 636
    Compute the spread of the conformer energies for each molecule in each file.
629 637
    The spread can be computed as the coefficient of variation of all methods,
@@ -675,7 +683,7 @@
Loading
675 683
        nanlist.append(confNans)
676 684
        wholedict[i]['titleMols'] = titleMols
677 685
678 -
    print("Removing mols missing from 1+ files...")
686 +
    print("  Removing any mols missing from 1+ files...")
679 687
    # find molecules common in all files
680 688
    molname_sets = [set(wholedict[x]['titleMols']) for x in range(num_files)]
681 689
    molname_common = set.intersection(*molname_sets)
@@ -695,59 +703,112 @@
Loading
695 703
            del enelist[i][index]
696 704
            del idxlist[i][index]
697 705
            del nanlist[i][index]
706 +
        wholedict[i]['confNums'] = idxlist[i]
698 707
699 -
    print("Removing un-finished conformers and computing relative energies...")
708 +
    print("  Removing any unfinished conformers and computing relative energies...")
700 709
    idxlist, enelist = remove_dead_conformers(enelist, idxlist, nanlist)
701 710
702 -
    # scale energies: scale energies by subtracting or dividing conf_j
703 -
    # by conf_i for all j confs in all mols for all files
704 -
    # (1) subtract first conf, or (2) divide first conf
705 -
    if isinstance(ref_conf, int):
706 -
        ref_conf_int = ref_conf
707 -
    rel_enelist = relative_energies(enelist, 'subtract', ref_conf_int)
708 -
    rat_enelist = relative_energies(enelist, 'divide', ref_conf_int)
709 -
710 -
    for i in range(num_files):
711 -
        wholedict[i]['compared_enes'] = rel_enelist[i]
712 -
        wholedict[i]['confNums'] = idxlist[i]
713 -
714 -
    # check that entire array is not zero, if ALL mols have one conf
715 -
    all_zeros = np.all(np.asarray(rel_enelist)==0)
716 -
    if all_zeros:
717 -
        print("WARNING: All relative energy values are zero. "
718 -
            "Cannot calculate energy spread.")
719 -
        spreadlist = [-1]*len(rel_enelist[0])
720 -
    else:
721 -
        # estimate spread of data on energies
722 -
        spreadlist, cvlist_all_mols = avg_coeffvar(rat_enelist, ref_conf_int)
723 -
        #spreadlist = normalized_deviation(rel_enelist)
711 +
    def estimate_variability(ref_conf_int):
724 712
725 -
    # loop over each mol and write energies from wholedict by column
726 -
    for m in range(len(wholedict[1]['titleMols'])):
727 -
        compF.write('\n\n# Mol ' + wholedict[1]['titleMols'][m])
713 +
        rel_enelist = relative_energies(enelist, 'subtract', ref_conf_int)
714 +
        rat_enelist = relative_energies(enelist, 'divide', ref_conf_int)
715 +
        for i in range(num_files):
716 +
            wholedict[i]['compared_enes'] = rel_enelist[i]
728 717
729 -
        # for this mol, write the rmsd from each file side by side
730 -
        line = ' {:.2f} ppm averaged CV'.format(spreadlist[m]*1000000)
731 -
        compF.write(line)
732 -
        compF.write('\n# ==================================================================')
733 -
        compF.write(f'\n# rel enes (kcal/mol), column i = file i, row j = conformer j')
718 +
        # check that entire array is not zero, if ALL mols have one conf
719 +
        all_zeros = np.all(np.asarray(rel_enelist)==0)
720 +
        if all_zeros:
721 +
            print("WARNING: All relative energy values are zero. "
722 +
                "Cannot calculate energy spread.")
723 +
            spreadlist = [-1]*len(rel_enelist[0])
724 +
        else:
725 +
            # estimate spread of data on energies
726 +
            spreadlist, cvlist_all_mols = avg_coeffvar(rat_enelist, ref_conf_int)
727 +
            #spreadlist = normalized_deviation(rel_enelist)
734 728
735 -
        # for this mol, write the compared_enes from each file by columns
736 -
        this_mol_conf_inds = wholedict[1]['confNums'][m]
737 -
        for c in range(len(this_mol_conf_inds)):
738 -
            line = '\n' + str(this_mol_conf_inds[c]) + '\t'
739 -
            for i in range(num_files):
740 -
                line += '{:.4f}\t'.format(wholedict[i]['compared_enes'][m][c])
741 -
742 -
            # print overall contribution to spread, skip ref conf since scaled
743 -
            # include second condition in case ref_conf_int is negative
744 -
            if c == ref_conf_int or c == len(this_mol_conf_inds)+ref_conf_int:
745 -
                line += '#  nan ppm CV'
746 -
            else:
747 -
                line += '# {:.2f} ppm CV'.format(cvlist_all_mols[m][c-1]*1000000)
748 -
            compF.write(line)
729 +
        return spreadlist, cvlist_all_mols
749 730
750 -
    compF.close()
731 +
    # scale energies: scale energies by subtracting or dividing conf_j
732 +
    # by conf_i for all j confs in all mols for all files
733 +
    # (1) subtract first conf, or (2) divide first conf
734 +
    if isinstance(ref_conf, int):
735 +
        spreadlist, cvlist_all_mols = estimate_variability(ref_conf)
736 +
737 +
        # loop over each mol and write energies from wholedict by column
738 +
        for m in range(len(wholedict[1]['titleMols'])):
739 +
            compF.write('\n\n# Mol ' + wholedict[1]['titleMols'][m])
740 +
            this_mol_conf_inds = wholedict[1]['confNums'][m]
741 +
742 +
            # for this mol, write spread from all files
743 +
            compF.write(' {:.2f} ppm averaged CV'.format(spreadlist[m]*1000000))
744 +
            compF.write('\n# ==================================================================')
745 +
            compF.write(f'\n# rel enes (kcal/mol), column i = file i, row j = conformer j')
746 +
747 +
            # for this mol, write compared_enes from each file by columns
748 +
            for c in range(len(this_mol_conf_inds)):
749 +
                line = '\n' + str(this_mol_conf_inds[c]) + '\t'
750 +
                for i in range(num_files):
751 +
                    line += '{:.4f}\t'.format(wholedict[i]['compared_enes'][m][c])
752 +
753 +
                # print overall contribution to spread, skip ref conf since scaled
754 +
                # include second condition in case ref_conf_int is negative
755 +
                if c == ref_conf or c == len(this_mol_conf_inds)+ref_conf:
756 +
                    line += '#  nan ppm CV'
757 +
                else:
758 +
                    line += '# {:.2f} ppm CV'.format(cvlist_all_mols[m][c-1]*1000000)
759 +
                compF.write(line)
760 +
761 +
    elif ref_conf == 'all':
762 +
763 +
        allref_spreadlist = [] # list[i][j] is ith ref conformer index, jth molecule
764 +
        allref_allmol_fileconf = {}  # dict[i][j][k] is ith ref conformer index, jth molecule, kth conformer
765 +
766 +
        highest_numconfs = max([len(l) for l in wholedict[1]['confNums']])
767 +
        print('  The highest number of conformers is ', highest_numconfs)
768 +
769 +
        for r in range(highest_numconfs):
770 +
            print('  Comparing to reference conformer {}...'.format(r))
771 +
            spreadlist, cvlist_all_mols = estimate_variability(r)
772 +
            allref_spreadlist.append(spreadlist)
773 +
            allref_allmol_fileconf[r] = cvlist_all_mols
774 +
775 +
        allref_spreadlist = np.asarray(allref_spreadlist).T # array[i][j] is ith molecule, jth reference index
776 +
777 +
        # loop over each mol and write energies from wholedict by column
778 +
        for m in range(len(wholedict[1]['titleMols'])):
779 +
780 +
            # write heading
781 +
            compF.write('\n\n# Mol ' + wholedict[1]['titleMols'][m])
782 +
            this_mol_conf_inds = wholedict[1]['confNums'][m]
783 +
784 +
            # (average over all references) of (average over all conformers of some reference)
785 +
            # only take up to how many conformers present since nan for ref_conf > num_confs
786 +
            avg_avg = np.mean(allref_spreadlist[m][:len(this_mol_conf_inds)])
787 +
            print('\n\n\n', allref_spreadlist[m], avg_avg)
788 +
789 +
            # for this mol, write average of average
790 +
            compF.write(' {:.4f} ppm averaged CV'.format(avg_avg*1000000))
791 +
            compF.write('\n# ==================================================================')
792 +
            compF.write('\n# \t' + '\t\t'.join(map(str, this_mol_conf_inds)))
793 +
            compF.write('\n# ==================================================================')
794 +
795 +
            # for this mol, write cv contribution from each reference by columns
796 +
            for c in range(len(this_mol_conf_inds)):
797 +
                line = '\n' + str(this_mol_conf_inds[c]) + '\t'
798 +
                for cref in range(len(this_mol_conf_inds)):
799 +
                    # diagonal is not applicable
800 +
                    if cref==c:
801 +
                        line += ' nan\t'
802 +
                        continue
803 +
                    line += '{:.4f}\t'.format(allref_allmol_fileconf[cref][m][c-1]*1000000)
804 +
                compF.write(line)
805 +
806 +
            # write (average over all conformers of some reference)
807 +
            this_mol_avgs = allref_spreadlist[m][:len(this_mol_conf_inds)]
808 +
            this_mol_avgs = ['{:.4f}'.format(flt*1000000) for flt in this_mol_avgs]
809 +
            compF.write('\nav\t' + '\t'.join(this_mol_avgs))
810 +
811 +
        compF.close()
751 812
752 813
    return wholedict
753 814
Files Coverage
quanformer 86.34%
Project Totals (11 files) 86.34%
575.1
TRAVIS_OS_NAME=osx
575.2
TRAVIS_PYTHON_VERSION=3.6
TRAVIS_OS_NAME=linux
1
# Codecov configuration to make it a bit less noisy
2
coverage:
3
  status:
4
    patch: false
5
    project:
6
      default:
7
        threshold: 50%
8
comment:
9
  layout: "header"
10
  require_changes: false
11
  branches: null
12
  behavior: default
13
  flags: null
14
  paths: null
15
ignore:
16
  - "quanformer/match_plot.py"
17

18
  - "quanformer/quan2modsem.py"
19
  - "quanformer/match_minima.py"
20
  - "quanformer/opt_vs_spe.py"
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading