Locked History Actions

attachment:step_8.py of Events/GCC2013/TrainingDay/API

Attachment 'step_8.py'

Download

   1 #!/usr/bin/env python
   2 
   3 """
   4 Step 8
   5 ======
   6 In this step, we'll do everything from steps 1 through 7 - then,
   7 when the workflow finishes, we'll check a particular dataset by name
   8 for statistics on how the workflow did.
   9 
  10 The output will be in a new history named 'Step 8'.
  11 """
  12 import os
  13 import sys
  14 import pprint
  15 import time
  16 
  17 import users_1
  18 import histories_3
  19 import tools_1
  20 import hdas_2
  21 import workflows_1
  22 
  23 # we'll be using the get_dataset_column function from this to get the statistics
  24 import datasets_3
  25 
  26 NEW_HISTORY_NAME = 'Step 8'
  27 DATA_TO_UPLOAD = 'data/myIlluminaRun.solexa.fastq'
  28 
  29 SOLEXA_QC_WORKFLOW_NAME = 'Joined Solexa QC'
  30 SOLEXA_QC_WORKFLOW_INPUT_STEP = 6
  31 
  32 # here's the name of the dataset containing the statistics - kinda silly in this case
  33 STATISTICS_DATASET_NAME = 'statistics'
  34 STATISTICS_DATASET_COLUMN = 5
  35 
  36 # ----------------------------------------------------------------------------- main
  37 if __name__ == '__main__':
  38     try:
  39         # check the connection
  40         users = users_1.get_users()
  41 
  42         # create a new history
  43         new_history = histories_3.create_history( NEW_HISTORY_NAME )
  44         print 'created history!', new_history[ 'name' ]
  45         new_history_id = new_history[ 'id' ]
  46         new_history_details = histories_3.get_history( new_history_id )
  47 
  48         # upload a file
  49         tool_output_datasets = tools_1.upload_hda( new_history_id, DATA_TO_UPLOAD )
  50         uploaded_file_data = tool_output_datasets[ 'outputs' ][0]
  51         print 'uploaded hda!', uploaded_file_data[ 'name' ]
  52         uploaded_file_id = uploaded_file_data[ 'id' ]
  53 
  54         # and use get_hda to get details on the new, uploaded HDA
  55         uploaded_hda_details = hdas_2.get_hda( new_history_id, uploaded_file_id )
  56         uploaded_hda_state = uploaded_hda_details[ 'state' ]
  57 
  58         # wait for the upload to finish
  59         while uploaded_hda_state != 'ok':
  60             print '\t uploaded_hda_state:', uploaded_hda_state
  61             print '\t (waiting 4 seconds...)'
  62             time.sleep( 4.0 )
  63 
  64             # keep checking to get any new state the HDA might move into
  65             uploaded_hda_details = hdas_2.get_hda( new_history_id, uploaded_file_id )
  66             uploaded_hda_state = uploaded_hda_details[ 'state' ]
  67 
  68         # here's the new stuff...moving fast now
  69 
  70         # get the info of the all workflows available to us
  71         all_workflows = workflows_1.get_workflows()
  72 
  73         # let's search that info for the name of the one we want in the list of all workflows
  74         found_workflow = None
  75         for workflow in all_workflows:
  76             if workflow[ 'name' ] == SOLEXA_QC_WORKFLOW_NAME:
  77                 found_workflow = workflow
  78 
  79         if not found_workflow:
  80             raise Exception( 'If you see this error, let one of the workhop presenters know' )
  81 
  82         print 'found workflow!', found_workflow[ 'name' ]
  83         target_workflow_id = found_workflow[ 'id' ]
  84         target_workflow_details = workflows_1.get_workflow( target_workflow_id )
  85 
  86         # now we'll run it:
  87         print 'running',  found_workflow[ 'name' ], 'workflow...'
  88         workflow_output = workflows_1.run_single_input_workflow_on_hda( target_workflow_id,
  89             new_history_id, uploaded_file_id, SOLEXA_QC_WORKFLOW_INPUT_STEP )
  90         print 'workflow started!'
  91 
  92         # the 'outputs' list of the workflow_output dictionary are the ids of the HDAs the workflow creates
  93         output_hda_ids = workflow_output[ 'outputs' ]
  94 
  95         # wait for them all to finish
  96         for hda_id in output_hda_ids:
  97             workflow_hda_details = hdas_2.get_hda( new_history_id, hda_id )
  98             workflow_hda_state = workflow_hda_details[ 'state' ]
  99             workflow_hda_name = workflow_hda_details[ 'name' ]
 100             print workflow_hda_name
 101 
 102             while workflow_hda_state != 'ok':
 103                 print '\t state:', workflow_hda_state
 104                 print '\t (waiting 4 seconds...)'
 105                 time.sleep( 4.0 )
 106 
 107                 # keep checking to get any new state the HDA might move into
 108                 workflow_hda_details = hdas_2.get_hda( new_history_id, hda_id )
 109                 workflow_hda_state = workflow_hda_details[ 'state' ]
 110 
 111             print '\t ok'
 112         print 'workflow complete!'
 113 
 114         # get some statistics using the datasets api
 115         # we'll need to find the proper HDA using it's name - we can use the same pattern we used to find the workflow
 116         found_statistics = None
 117         hda_summaries = hdas_2.get_hdas( new_history_id )
 118         for hda_summary in hda_summaries:
 119             if hda_summary[ 'name' ] == STATISTICS_DATASET_NAME:
 120                 found_statistics = hda_summary
 121                 # (we can use 'break' to exit the loop early)
 122                 break
 123         if not found_statistics:
 124             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 125         statistics_dataset_id = found_statistics[ 'id' ]
 126 
 127         # now we'll use the datasets API to get some actual data from *inside* the dataset's file
 128         # in this case, we're interested in column 5 - the per base quality score across all the reads
 129         column_data = datasets_3.get_dataset_column( statistics_dataset_id, STATISTICS_DATASET_COLUMN )
 130         # get_dataset_column produces both raw data (in 'data') and some rough statistics of its own
 131         #   we'll grab the mean and median per-base quality for this fastq
 132         # there will be metadata for each column - but we only got one column so we'll get the first metadata object
 133         column_metadata = column_data[ 'meta' ][0]
 134         mean_perbase_quality = column_metadata[ 'mean' ]
 135         median_perbase_quality = column_metadata[ 'median' ]
 136 
 137         # now we can do itelligent things based on the stats:
 138         #   - like add the fastq to a library or request a re-run of the sequencing (and make the lab techs mad)
 139         
 140     except Exception, exc:
 141         print 'Error getting statistics from dataset:', str( exc )
 142         sys.exit( 1 )
 143 
 144     print 'mean:', mean_perbase_quality, 'median:', median_perbase_quality

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2013-06-24 16:01:09, 8.2 KB) [[attachment:Galaxy-Workflow-Joined_Solexa_QC.ga]]
  • [get | view] (2013-06-24 16:46:36, 14.0 KB) [[attachment:all-scripts.tar.gz]]
  • [get | view] (2013-06-24 16:01:23, 5.2 KB) [[attachment:common.py]]
  • [get | view] (2013-06-24 16:04:07, 0.9 KB) [[attachment:datasets_1.py]]
  • [get | view] (2013-06-24 16:04:15, 1.2 KB) [[attachment:datasets_2.py]]
  • [get | view] (2013-06-24 16:04:22, 3.4 KB) [[attachment:datasets_3.py]]
  • [get | view] (2013-06-24 16:04:33, 1.5 KB) [[attachment:hdas_1.py]]
  • [get | view] (2013-06-24 16:04:50, 1.7 KB) [[attachment:hdas_2.py]]
  • [get | view] (2013-06-24 16:05:07, 2.2 KB) [[attachment:hdas_3.py]]
  • [get | view] (2013-06-24 16:03:38, 0.9 KB) [[attachment:histories_1.py]]
  • [get | view] (2013-06-24 16:03:52, 2.1 KB) [[attachment:histories_2.py]]
  • [get | view] (2013-06-24 16:05:25, 2.2 KB) [[attachment:histories_3.py]]
  • [get | view] (2013-06-24 16:05:34, 3.3 KB) [[attachment:lddas_1.py]]
  • [get | view] (2013-06-24 16:05:41, 1.3 KB) [[attachment:libraries_1.py]]
  • [get | view] (2013-06-24 16:01:36, 0.8 KB) [[attachment:setup.py]]
  • [get | view] (2013-06-24 16:02:45, 1.1 KB) [[attachment:step_1.py]]
  • [get | view] (2013-06-24 16:07:15, 9.9 KB) [[attachment:step_10.py]]
  • [get | view] (2013-06-24 16:03:28, 1.4 KB) [[attachment:step_2.py]]
  • [get | view] (2013-06-24 16:06:01, 1.6 KB) [[attachment:step_3.py]]
  • [get | view] (2013-06-24 16:06:11, 1.1 KB) [[attachment:step_4.py]]
  • [get | view] (2013-06-24 16:06:20, 1.9 KB) [[attachment:step_5.py]]
  • [get | view] (2013-06-24 16:06:44, 2.5 KB) [[attachment:step_6.py]]
  • [get | view] (2013-06-24 16:06:53, 4.9 KB) [[attachment:step_7.py]]
  • [get | view] (2013-06-24 16:07:05, 5.9 KB) [[attachment:step_8.py]]
  • [get | view] (2013-06-30 13:51:46, 8.0 KB) [[attachment:step_9.py]]
  • [get | view] (2013-06-24 16:41:51, 1.8 KB) [[attachment:tools_1.py]]
  • [get | view] (2013-06-24 16:44:32, 2.0 KB) [[attachment:upload_to_history.py]]
  • [get | view] (2013-06-24 16:03:03, 1.3 KB) [[attachment:users_1.py]]
  • [get | view] (2013-06-24 16:44:44, 2.9 KB) [[attachment:workflows_1.py]]
 All files | Selected Files: delete move to page

You are not allowed to attach a file to this page.