#!/usr/bin/env python
import pandas as pd
import sys
phobius_table = sys.argv[1]
proteins_table = sys.argv[2]
merged_tables_file = 'merged_tables.tsv'
phobius_df = pd.read_table(phobius_table, sep='\t', names=['SEQ_ID', 'TM', 'SP', 'PREDICTION'])
proteins_df = pd.read_table(proteins_table, sep='\t', names=['SEQ_HEADER', 'SEQ'])
proteins_df['SEQ_ID'] = proteins_df['SEQ_HEADER'].apply(lambda x: x.split(' ')[0])
proteins_df['DESCRIPTION'] = proteins_df['SEQ_HEADER'].apply(lambda x: ' '.join(x.split(' ')[1:]))
merged_df = phobius_df.merge(proteins_df, on='SEQ_ID', how='left')
merged_df = merged_df.loc[merged_df['TM'] >= 5]
merged_df.to_csv(merged_tables_file, sep='\t', columns=['SEQ_ID', 'DESCRIPTION', 'TM', 'SP', 'PREDICTION', 'SEQ'])