Locked History Actions

attachment:step_9.py of Events/GCC2013/TrainingDay/API

Attachment 'step_9.py'

Download

   1 #!/usr/bin/env python
   2 
   3 """
   4 Step 9
   5 ======
   6 In this step, we'll do everything from steps 1 through 8 - then,
   7 when we have quality stats, we'll rename and annotate the HDAs containing the
   8 forward and reverse reads.
   9 
  10 The output will be in a new history named 'Step 9'.
  11 """
  12 import os
  13 import sys
  14 import pprint
  15 import time
  16 
  17 import users_1
  18 import histories_3
  19 import tools_1
  20 import workflows_1
  21 import datasets_3
  22 
  23 # we'll use the latest hdas module in order to get the update_hda function
  24 #import hdas_2
  25 import hdas_3
  26 
  27 
  28 NEW_HISTORY_NAME = 'Step 9'
  29 DATA_TO_UPLOAD = 'data/myIlluminaRun.solexa.fastq'
  30 
  31 SOLEXA_QC_WORKFLOW_NAME = 'Joined Solexa QC'
  32 SOLEXA_QC_WORKFLOW_INPUT_STEP = 6
  33 
  34 STATISTICS_DATASET_NAME = 'statistics'
  35 STATISTICS_DATASET_COLUMN = 5
  36 
  37 # here's the name of the dataset containing the statistics - kinda silly in this case
  38 FREAD_DATASET_NAME = 'forward reads'
  39 RREAD_DATASET_NAME = 'reverse reads'
  40 
  41 # ----------------------------------------------------------------------------- main
  42 if __name__ == '__main__':
  43     try:
  44         # check the connection
  45         users = users_1.get_users()
  46 
  47         # create a new history
  48         new_history = histories_3.create_history( NEW_HISTORY_NAME )
  49         print 'created history!', new_history[ 'name' ]
  50         new_history_id = new_history[ 'id' ]
  51         new_history_details = histories_3.get_history( new_history_id )
  52 
  53         # upload a file
  54         tool_output_datasets = tools_1.upload_hda( new_history_id, DATA_TO_UPLOAD )
  55         uploaded_file_data = tool_output_datasets[ 'outputs' ][0]
  56         print 'uploaded hda!', uploaded_file_data[ 'name' ]
  57         uploaded_file_id = uploaded_file_data[ 'id' ]
  58 
  59         # and use get_hda to get details on the new, uploaded HDA
  60         uploaded_hda_details = hdas_3.get_hda( new_history_id, uploaded_file_id )
  61                                # ^^^ --------------------------------------- we're using a new version
  62                                #                                             - don't forget to change things like this
  63         uploaded_hda_state = uploaded_hda_details[ 'state' ]
  64 
  65         # wait for the upload to finish
  66         while uploaded_hda_state != 'ok':
  67             print '\t uploaded_hda_state:', uploaded_hda_state
  68             print '\t (waiting 4 seconds...)'
  69             time.sleep( 4.0 )
  70 
  71             # keep checking to get any new state the HDA might move into
  72             uploaded_hda_details = hdas_3.get_hda( new_history_id, uploaded_file_id )
  73             uploaded_hda_state = uploaded_hda_details[ 'state' ]
  74 
  75         # here's the new stuff...moving fast now
  76 
  77         # get the info of the all workflows available to us
  78         all_workflows = workflows_1.get_workflows()
  79 
  80         # let's search that info for the name of the one we want in the list of all workflows
  81         found_workflow = None
  82         for workflow in all_workflows:
  83             if workflow[ 'name' ] == SOLEXA_QC_WORKFLOW_NAME:
  84                 found_workflow = workflow
  85 
  86         if not found_workflow:
  87             raise Exception( 'If you see this error, let one of the workhop presenters know' )
  88 
  89         print 'found workflow!', found_workflow[ 'name' ]
  90         target_workflow_id = found_workflow[ 'id' ]
  91         target_workflow_details = workflows_1.get_workflow( target_workflow_id )
  92 
  93         # now we'll run it:
  94         print 'running',  found_workflow[ 'name' ], 'workflow...'
  95         workflow_output = workflows_1.run_single_input_workflow_on_hda( target_workflow_id,
  96             new_history_id, uploaded_file_id, SOLEXA_QC_WORKFLOW_INPUT_STEP )
  97         print 'workflow started!'
  98 
  99         # the 'outputs' list of the workflow_output dictionary are the ids of the HDAs the workflow creates
 100         output_hda_ids = workflow_output[ 'outputs' ]
 101 
 102         # wait for them all to finish
 103         for hda_id in output_hda_ids:
 104             workflow_hda_details = hdas_3.get_hda( new_history_id, hda_id )
 105             workflow_hda_state = workflow_hda_details[ 'state' ]
 106             workflow_hda_name = workflow_hda_details[ 'name' ]
 107             print workflow_hda_name
 108 
 109             while workflow_hda_state != 'ok':
 110                 print '\t state:', workflow_hda_state
 111                 print '\t (waiting 4 seconds...)'
 112                 time.sleep( 4.0 )
 113 
 114                 # keep checking to get any new state the HDA might move into
 115                 workflow_hda_details = hdas_3.get_hda( new_history_id, hda_id )
 116                 workflow_hda_state = workflow_hda_details[ 'state' ]
 117 
 118             print '\t ok'
 119         print 'workflow complete!'
 120 
 121         # get some statistics using the datasets api
 122         found_statistics = None
 123         hda_summaries = hdas_3.get_hdas( new_history_id )
 124         for hda_summary in hda_summaries:
 125             if hda_summary[ 'name' ] == STATISTICS_DATASET_NAME:
 126                 found_statistics = hda_summary
 127                 break
 128         if not found_statistics:
 129             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 130         statistics_dataset_id = found_statistics[ 'id' ]
 131 
 132         column_data = datasets_3.get_dataset_column( statistics_dataset_id, STATISTICS_DATASET_COLUMN )
 133         column_metadata = column_data[ 'meta' ][0]
 134         mean_perbase_quality = column_metadata[ 'mean' ]
 135         median_perbase_quality = column_metadata[ 'median' ]
 136         print 'mean:', mean_perbase_quality, 'median:', median_perbase_quality
 137 
 138         # now we'll rename and annotate the split forward and reverse reads
 139         # we'll need to find them first - familiar pattern
 140         forward_reads_hda = None
 141         reverse_reads_hda = None
 142         # we've already got a list from the statistics step above
 143         for hda_summary in hda_summaries:
 144             if   hda_summary[ 'name' ] == FREAD_DATASET_NAME:
 145                 forward_reads_hda = hda_summary
 146             elif hda_summary[ 'name' ] == RREAD_DATASET_NAME:
 147                 reverse_reads_hda = hda_summary
 148         if not forward_reads_hda or not reverse_reads_hda:
 149             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 150         print 'found fwd/rev reads:', forward_reads_hda[ 'name' ], reverse_reads_hda[ 'name' ]
 151 
 152         # New stuff: let's update the names and annotations of each file using the update_hda API call
 153 
 154         # first, we'll use the first part of the file we uploaded for part of the new names
 155         # (this would be a good place to customize what you grab from the filename to make it the HDA name)
 156         uploaded_basename = os.path.basename( DATA_TO_UPLOAD )
 157         uploaded_prefix = uploaded_basename.split( '.' )[0]
 158         annotation_string  = 'mean perbase quality: ' + str( mean_perbase_quality ) + '; '
 159         annotation_string += 'median perbase quality: ' + str( median_perbase_quality ) + '; '
 160 
 161         forward_reads_hda_id = forward_reads_hda[ 'id' ]
 162         hdas_3.update_hda( new_history_id, forward_reads_hda_id, {
 163             'name'          : uploaded_prefix + '.fwd.fastqsanger',
 164             'annotation'    : annotation_string
 165         })
 166 
 167         reverse_reads_hda_id = reverse_reads_hda[ 'id' ]
 168         # the annotations are the same, but note the 'rev' in the name to indicate reverse reads
 169         hdas_3.update_hda( new_history_id, reverse_reads_hda_id, {
 170             'name'          : uploaded_prefix + '.rev.fastqsanger',
 171             'annotation'    : annotation_string
 172         })
 173 
 174         # let's make sure that worked by getting the details for these two HDAs
 175         forward_reads_hda_details = hdas_3.get_hda( new_history_id, forward_reads_hda_id )
 176         forward_reads_hda_name = forward_reads_hda_details[ 'name' ]
 177         reverse_reads_hda_details = hdas_3.get_hda( new_history_id, reverse_reads_hda_id )
 178         reverse_reads_hda_name = reverse_reads_hda_details[ 'name' ]
 179         # (currently the hdas.show API doesn't return the annotations: you'll have to double check
 180         #   that in the browser for now)
 181         
 182     except Exception, exc:
 183         print 'Error updating HDA names/annotations:', str( exc )
 184         sys.exit( 1 )
 185 
 186     print 'Forward:', forward_reads_hda_name
 187     print 'Reverse:', reverse_reads_hda_name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2013-06-24 16:01:09, 8.2 KB) [[attachment:Galaxy-Workflow-Joined_Solexa_QC.ga]]
  • [get | view] (2013-06-24 16:46:36, 14.0 KB) [[attachment:all-scripts.tar.gz]]
  • [get | view] (2013-06-24 16:01:23, 5.2 KB) [[attachment:common.py]]
  • [get | view] (2013-06-24 16:04:07, 0.9 KB) [[attachment:datasets_1.py]]
  • [get | view] (2013-06-24 16:04:15, 1.2 KB) [[attachment:datasets_2.py]]
  • [get | view] (2013-06-24 16:04:22, 3.4 KB) [[attachment:datasets_3.py]]
  • [get | view] (2013-06-24 16:04:33, 1.5 KB) [[attachment:hdas_1.py]]
  • [get | view] (2013-06-24 16:04:50, 1.7 KB) [[attachment:hdas_2.py]]
  • [get | view] (2013-06-24 16:05:07, 2.2 KB) [[attachment:hdas_3.py]]
  • [get | view] (2013-06-24 16:03:38, 0.9 KB) [[attachment:histories_1.py]]
  • [get | view] (2013-06-24 16:03:52, 2.1 KB) [[attachment:histories_2.py]]
  • [get | view] (2013-06-24 16:05:25, 2.2 KB) [[attachment:histories_3.py]]
  • [get | view] (2013-06-24 16:05:34, 3.3 KB) [[attachment:lddas_1.py]]
  • [get | view] (2013-06-24 16:05:41, 1.3 KB) [[attachment:libraries_1.py]]
  • [get | view] (2013-06-24 16:01:36, 0.8 KB) [[attachment:setup.py]]
  • [get | view] (2013-06-24 16:02:45, 1.1 KB) [[attachment:step_1.py]]
  • [get | view] (2013-06-24 16:07:15, 9.9 KB) [[attachment:step_10.py]]
  • [get | view] (2013-06-24 16:03:28, 1.4 KB) [[attachment:step_2.py]]
  • [get | view] (2013-06-24 16:06:01, 1.6 KB) [[attachment:step_3.py]]
  • [get | view] (2013-06-24 16:06:11, 1.1 KB) [[attachment:step_4.py]]
  • [get | view] (2013-06-24 16:06:20, 1.9 KB) [[attachment:step_5.py]]
  • [get | view] (2013-06-24 16:06:44, 2.5 KB) [[attachment:step_6.py]]
  • [get | view] (2013-06-24 16:06:53, 4.9 KB) [[attachment:step_7.py]]
  • [get | view] (2013-06-24 16:07:05, 5.9 KB) [[attachment:step_8.py]]
  • [get | view] (2013-06-30 13:51:46, 8.0 KB) [[attachment:step_9.py]]
  • [get | view] (2013-06-24 16:41:51, 1.8 KB) [[attachment:tools_1.py]]
  • [get | view] (2013-06-24 16:44:32, 2.0 KB) [[attachment:upload_to_history.py]]
  • [get | view] (2013-06-24 16:03:03, 1.3 KB) [[attachment:users_1.py]]
  • [get | view] (2013-06-24 16:44:44, 2.9 KB) [[attachment:workflows_1.py]]
 All files | Selected Files: delete move to page

You are not allowed to attach a file to this page.