Home > Class & module > 3.Collection > GeneService

GeneService

 

This class includes gene analysis related modules.

 


o Summary

 

 o Parse & manipulation

 Module

 Description

 Function Check_DNASequnece (Source_Sequence As String) As Boolean

 Check if 'Source_Sequence' is correct DNA sequence

It returns 'True if the sequence is correct

 Function Generate_RandomSeq (Tot_Length As Long) As String

 Generate random DNA sequence

 Function Get_Complementary_Seq (Source_Seq As String) As String

 Get complimentary DNA sequence

 Function Get_Reverse_Seq (Source_Seq As String) As String

 Get reverse DNA sequence

 Function Get_ReverseCompl_Seq(Source_Seq As String) As String

 Get reverse complimentary DNA sequence

 Function Parse_AminoAcidSeq (Source_Seq As String) As String

 Parse 'Source_Seq' for amino acid sequence

 Function Parse_DNASeq (Source_Seq As String) As String

 Parse 'Source_Seq' for DNA sequence

 Function Translate_DNASeq (Source_Seq As String) As String

 Translate 'Source_Seq' to amino acid sequence

  

 o CAI (Codon adaptation index)

 Module

 Description

 Function Current_WTableFile () As String

 Return default 'Codon usage and w-table' file.

 * It is 'Target codon usage and w-table' file shown in 'Project' window

 Property Set_WTableFile () As String

 Set 'Codon usage and w-table file' for CAI calculation

 Function Calculate_CAI (Source_Seq As String, IsBulmerCorrection As Boolean) As Single

 Copy 'Source_String' to clipboard

 * First call, Set_WTableFile

 

 o tAI (tRNA adapation index)

 Module

 Description

 Function Calculate_tAI (Source_Seq As String, OrganismName As String) As Single

 If OrganismName doesn't match, it will return zero

 

o GC content

 Module

 Description

 Function Calculate_GC_Content (Source_Seq As String, IsInclude_1stPlace As Boolean, IsInclude_2ndPlace As Boolean, IsInclude_3rdPlace As Boolean, IsPercent As Boolean) As Single

 Calculate GC content with options.

 Function Calculate_GC_Content_1st (Source_Seq As String, IsPercent As Boolean) As Single

 Calculate GC content at 1st place

 Function Calculate_GC_Content_2nd (Source_Seq As String, IsPercent As Boolean) As Single

 Calculate GC content at 2nd place

 Function Calculate_GC_Content_3rd (Source_Seq As String, IsPercent As Boolean) As Single

 Calculate GC content at 3rd place

 Function Calculate_GC_Content_Overall (Source_Seq As String, IsPercent As Boolean) As Single

 Calculate overall GC content

 

o Codon usage count

 Module

 Description

 Sub Calculate_CodonUsage (Source_Seq As String)

 Calculate and build codon usage table

 Function Get_CodonUsage_Triplet (Type64_Address As Integer) As String

 Get codon (triplet name) in calculated codon usage table

 * Type64_Address means a number between 1 to 64

 Function Get_CodonUsage_AminoAcid (Type64_Address As Integer) As String

 Get amino acid name in calculated codon usage table

 Function Get_CodonUsage_AminoAcidAbbrev (Type64_Address As Integer) As String

 Get short amino acid name (1 letter) in calculated codon usage table

 Function Get_CodonUsage_Fraction (Type64_Address As Integer) As Single

 Get fraction in calculated codon usage table

 * Fraction of synonymous codons of a amoni acid

 Function Get_CodonUsage_RSCU (Type64_Address As Integer) As Single

 Get RSCU in calculated codon usage table

 * Fraction of synonymous codons of a amoni acid

 Function Get_CodonUsage_Count (Type64_Address As Integer) As Integer

 Get count of codon in calculated codon usage table

 Function Get_CodonUsage_TableStr () As String

 Get table text format  of entire codon usage table

 * First, call 'Calculate_CodonUsage'

* Check Type64_Address 

 

o Nc (Effective number of codons)

 Module

 Description

 Function Calculate_Nc (Source_Seq As String, Calculate_CodonUsage As Boolean) As Single

 Calculate Nc

 * Calculate_CodonUsage is optional parameter (Default is True). If you already perform 'Calculate_CodonUsage', please set the value to be False to increase calculation speed.

 * Fuglsang's F3 correction will be used if a gene contains less than 4 isoleucines

 

o Codon usage and w-table

 Module

 Description

 Property Set_WTableFile () As String

 Set codon usage and w-table file for CAI calculation

 Function Get_CodonAddress_Of_Triplet (Source_Triplet As String) As Integer

 Get Codon address of triplet in Codon usage table

 Function Count_SynTriplet_Of_AminoAcidAbbrev (Source_AminoAcidAbbrev As String) As Integer

 Total number of synonymous codons of specified amino acid (abbreviated one letter name) in Codon usage table

 Function Get_SynTriplet_Of_AminoAcidAbbrev (Source_AminoAcidAbbrev As String,  NumberOfCodon As Integer) As String

 Get synonymous codon of specified n-th amino acid (abbreviated one letter name) in Codon usage table

 Function Get_Triplet_Abbrev(Source_Triplet As String) As String

 Get codon name (Abbreviated one letter name)

 Function Get_CodonAddress_Of_Triplet (Source_Triplet As String) As Integer

 Get codon address (1~64) in Codon usage table

 Function Get_Triplet_Fraction (Source_Triplet As String) As Single

 Get fraction of specified codon in Codon usage table

 Function Get_Triplet_Frequency (Source_Triplet As String) As Single

 Get frequency of specified codon in Codon usage table

 Function Get_Triplet_From_CodonAddress (Source_CodonAddress As Integer) As String

 Get triplet type of codon from specified codon address from 1 to 64

 Function Get_Triplet_RSCU (Source_Triplet As String) As Single

 Get RSCU of specified codon in Codon usage table

 Function Get_Triplet_RSCUmax (Source_Triplet As String) As Single

 Get RSCUmax of specified codon in Codon usage table

 Function Get_Triplet_Wi (Source_Triplet As String) As Single

 Get Wi of specified codon in Codon usage table

 

 o Restriction enzyme

 Module

 Description

 Function Get_ResEnz_Name (EnzymeIndex As Integer) As String

 Get restriction enzyme name

 Function Get_ResEnz_Seq (EnzymeIndex As Integer) As String

 Get restriction enzyme's DNA sequence

 Function Get_Seq_Of_ResEnz (Source_ResEnz As String) As String

 Get DNA sequence of restriction enzyme

 Function InternalCode_Check_RestrctionEnzyme (Source_Seq As String, Target_Seq As String) As String

 Return 'Not pass' if 'Source_Seq' contains target restriction enzyme name ('Target_Seq')

 Or it returns 'Pass'

 Property ResEnzCount () As Integer

 Get total number of registered restriction enzymes

 Function Translate_ResEnz (SourceStr As String) As String

 Convert restriction enzymes ('SourceStr') to DNA sequences

 

o Sequence search

 Module

 Description

 Function Get_SearchSequences_Str (Source_Seq As String, FindWhat_Seq As String) As String

 Search sequences and then return string expression

 * FindWhat_Seq: each sequence can be separated by tab,  carriage return (enter key), or comma (,)

 

 o Repeated sequence

 Module

 Description

 Function Get_RepeatedSeq_Str () As String

 Get a resultant string

 * Check 'Search_RepeatedSeq_Fwd' and 'Search_RepeatedSeq_BwdCmp' modules

 Function InternalCode_Check_RepeatedSequence (Source_Seq As String, Opt_MinSize As Integer, Opt_MaxSize As Integer, Opt_SmartFilter As Boolean, Opt_AllwableNumber As Integer)

 Return 'Not pass' if 'Source_Seq' contains repeated sequence more than 'Opt_AllowableNumber'

 Or it returns 'Pass'

 Property RepeatedSeq_Count () As Integer

 Get total number of repeated sequences

 * Check  'Search_RepeatedSeq_Fwd' and 'Search_RepeatedSeq_BwdCmp' modules

 Sub Search_RepeatedSeq_Fwd (Source_Seq As String , Min_Length As Integer, Max_Length As Integer, Ucase_Enabled As Boolean, SmartScan_Enalbed As Boolean)

 Search repeated sequence (DNA or Amino acid) forward direction

 Sub Search_RepeatedSeq_BwdCmp (Source_Seq As String , Min_Length As Integer, Max_Length As Integer, Ucase_Enabled As Boolean, SmartScan_Enalbed As Boolean)

 Search repeated sequence (DNA or Amino acid) backward direction

 

o Mismatched sequence

 Module

 Description

 Function Calculate_Mismatchedbp (Source_Seq1 As String, Source_Seq2 As String, IsPercent As Boolean) As Single

 Calculate sequence difference

 Function Compare_Mismatchedbp (Source_Seq1 As String, Source_Seq2 As String, Out_Str As String) As Integer

 Compare sequences and returns number of mismatched basepair. At the same time, it returns alignment resultant string to 'Out_Str' variable

 

o Codon optimization

 Module

 Description

 Sub Do_CodonOpt_ProbabilityRandom_MakeLUT (CutOff_Fraction As Single)

 Build Look-Up-Table for codon optimization

 * CutOff_Fraction is not available

 Function Do_CodonOpt_ProbabilityRandom (Source_Seq As String, Max_Change As Integer) As String

 Do codon optimization

 It return optimized sequence

 Function RandomChoice (Percent_A As Single, Percent_C As Single, Percent_G As Single, Percent_T As Single) As String

 Random selection of single nucleotide among A, C, G, and T with given probability (percent)

 * It return single nucleotide.

 

o mRNA optimization

 Module

 Description

 Function Do_mRNAOpt_ProbabilityRandom (Source_Seq As String, AvgBaseGibbs_Low As Single, Optional AvgBaseGibbs_High As Single, Optional MaxMutation As Integer) As String

Adjust mRNA Gibbs energy to be in the specified range given by lowest Gibbs Energy (AvgBaseGibbs_Low) and highest Gibbs Energy (AvgBaseGibbs_High). It modified sequence performing Codon optimization or synonymous substitution.

MaxMutation: Optional. Total number of trials. Default value=50

AvgBaseGibbs_High: Optional. Default value=0

* It returns optimize sequence

 Function Do_mRNALocalOpt (Source_Seq As String, StartLocation As Integer, EndLocation As Integer, AvgBaseGibbs_Low As Single, Optional AvgBaseGibbs_High As Single, Optional MaxMutation As Integer) As String

Slight modification of Do_mRNAOpt_ProbabilityRandom. A user can set the target range given by Start and End locations in a sequence.

AvgBaseGibbs_High: Optional. Default value=0

MaxMutation: Optional. Total number of trials. Default value=50

 Function Do_mRNAProfileOpt (Source_Seq As String, AvgBaseGibbs_Low As Single, Optional AvgBaseGibbs_High As Single, Optional WindowSize As Integer, Optional StepSize As Integer, Optional MaxMutation As Integer) As String

Improved from Do_mRNAOpt_ProbabilityRandom. It optimizes a mRNA Gibbs energy profile along a long sequence. A moving window method was adopted.

AvgBaseGibbs_High: Optional. Default value=0

Window size: Size of moving window. Optional. Default value=60

Step size: Interval when a moving window  scans. Optional. Default value=12

MaxMutation: Optional. Total number of trials. Default value=50

* The moving window method may not be perfect to screen out improper local sequences.

 Function Do_mRNALocalProfileOptSource_Seq As String, StartLocation As Integer, EndLocation As Integer, AvgBaseGibbs_Low As Single, Optional AvgBaseGibbs_High As Single, Optional WindowSize As Integer, Optional StepSize As Integer, Optional MaxMutation As Integer) As String

Modification of Do_mRNAProfileOpt. A user can set the target range given by Start and End locations in a sequence.

* The moving window method may not be perfect to screen out improper local sequences.

 

o Silent removal

 Module

 Description

 Function Silent_Removal_Once (FullSeq As String, ShortSeq As String, StartPosition As Integer) As Integer

 One time silent removal of ShortSeq in FullSeq

 StartPosition is optional parameter (Default 1)

 If not successful, return -1

 If not found, return 0

 If successful, return 1

 Property Get_LastResult_Silent_Removal_Once() As Integer

 Get the last result of Silent_Removal_Once

 * Only readable

 Function Silent_Removal_All (FullSeq As String, ShortSeq As String, TotalCycle As Integer) As Integer

 Silent removal of all found sequence (ShortSeq)

 TotalCycle is optional parameter (Default 100)

 If not successful or not perfect, return -1

 If not found, return 0

 If successful, return 1

 Property Get_LastResult_Silent_Removal_All() As Integer

 Get the last result of Silent_Removal_All

 * Only readable

 

o Excluded sequence

 Module

 Description

 Function InternalCode_Check_ExcludedSeq (Source_Seq As String, Target_Seq As String) As String

 Return 'Not pass' if 'Source_Seq' contains 'Target_Seq'.

 Or it returns 'Pass'

  

 
 

o Type64_Address

 

Type64_Address Codon
1 TTT
2 CTT
3 ATT
4 GTT
5 TCT
6 CCT
7 ACT
8 GCT
9 TAT
10 CAT
11 AAT
12 GAT
13 TGT
14 CGT
15 AGT
16 GGT
17 TTC
18 CTC
19 ATC
20 GTC
21 TCC
22 CCC
23 ACC
24 GCC
25 TAC
26 CAC
27 AAC
28 GAC
29 TGC
30 CGC
31 AGC
32 GGC
33 TTA
34 CTA
35 ATA
36 GTA
37 TCA
38 CCA
39 ACA
40 GCA
41 TAA
42 CAA
43 AAA
44 GAA
45 TGA
46 CGA
47 AGA
48 GGA
49 TTG
50 CTG
51 ATG
52 GTG
53 TCG
54 CCG
55 ACG
56 GCG
57 TAG
58 CAG
59 AAG
60 GAG
61 TGG
62 CGG
63 AGG
64 GGG
 
 

o Example 1

The following VBscript code shows current default 'Codon usage and w-value table'

Function Main()

       MsgBox GeneService.Current_WTableFile

End Function

 

 

o Example 2

Calculate tAI (tRNA adaptation index) for given sequence

Function Main()

     Main=GeneService.Calculate_tAI("ATGGCTATGGAAGTGAAGAAGATCTTT","E. coli")

End Function

 

o Example 3

Calculate codon usage and then copy to clipboard

Function Main()

     SourceSeq=AppService.Workspace_Value

     Call GeneService.Calculate_CodonUsage(SourceSeq)

    Temp="Triplet" + Chr(9) + "Count" + Chr(13) + Chr(10)

     For q=1 to 64

          Temp=Temp +  GeneService.Get_CodonUsage_Triplet(q)+ Chr(9)

          Temp=Temp +  CStr(GeneService.Get_CodonUsage_Count(q))+ Chr(13) + Chr(10)

     Next

     Call AppService.Clipboard_Copy(Temp)

     Call AppService.InstantMsg("Codon usage table was copied to clipboard")

End Function

 

o Example 4

Create CAI profile

Function Main()

     '-------- Setting  ---------------------------------------------------------------------------------------------------------------------

      Window_Size=60         'Length of partial test sequence

      Step_Size=3*3                'Interval between partial test sequences through entire sequence      

                                                   'It should be a multiple of 3

      GeneService.Set_WTableFile=GeneService.Current_WTableFile

      '------------------------------------------------------------------------------------------------------------------------------------------

 

      SourceSeq=AppService.Workspace_Value

      If SourceSeq="" then

              Exit Function

      End If

      SourceSeq_Length=Len(SourceSeq)

      Start_Position=1

      End_Position=SourceSeq_Length

 

      Buf_Str=""  

 

 

     '**** Graphics **********************

     CustomUI.Define_Canvas 500,200

     CustomUI.Form_BringToFront

     CustomUI.Clear_Canvas

     CustomUI.Form_Caption="CAI profile diagram"

     CustomUI.DrawLine 30,95,490,95,"LightGray"

     CustomUI.DrawLine 30,10,30,180,"Black"

     CustomUI.DrawLine 30,180,490,180,"Black"

     CustomUI.DrawString 5,85,"CAI","Blue"

     CustomUI.DrawString 250,180,"Location","Red"

     CustomUI.DrawString 15,10,"1","Blue"

     CustomUI.DrawString 15,170,"0","Blue"

     CustomUI.Set_ForeColor_byName "Black"

 

 

      '*** Calculate CAI ************************************************

     For Current_Position=Start_Position to End_Position - Window_Size step Step_Size

 

                AppService.InstantMsg "Calculating CAI at " + CStr(Current_Position)

 

                TestSeq=Mid(SourceSeq,Current_Position,Window_Size)

                Local_CAI= GeneService.Calculate_CAI(TestSeq,True)

               

                X2=(Current_Position-Start_Position )/SourceSeq_Length*480+30

                Y2=170-(Local_CAI*160)+10

                If Current_Position>Start_Position Then         

                     CustomUI.DrawLine X1,Y1,X2,Y2

                     CustomUI.DrawRectangle X2-1,Y2-1,2,2

                     CustomUI.Update_Canvas

                End If     

                X1=X2

                Y1=Y2

 

                Buf_Str=Buf_Str + CStr(Current_Position) + Chr(9) + CStr(Local_CAI) + Chr(13) + Chr(10)         

     Next

 

     CustomUI.RawData=Buf_Str

     CustomUI.Make_Clone

   

End Function