In [1]:
from specio import specread
from os import path
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import re

sparql = SPARQLWrapper("https://opendata1.opendata.u-psud.fr/sparql/")
sparql.setQuery("""

prefix projectInvivo: <http://invivo.lipsys2.u-psud.fr/wiki/Data:Analyse_des_macrophages#>
prefix daapp: <http://daap.eu/wiki/Data:Project#> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix vcard: <http://www.w3.org/2006/vcard/ns#>

select DISTINCT  ?picture
where {
  <http://daap.eu/wiki/Analyse_des_macrophages_Méthode_2_Campagne_2_:_2017> 
  projectInvivo:hasSlide  ?slide .
    ?slide projectInvivo:hasPicture ?picture .
} 

""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

filesNAS = [result["picture"]["value"]  for result in results["results"]["bindings"]]
filenames = sorted([re.sub('file://filer.ups.u-psud.fr/archivagesciences\$/Chimie%20Analytique/projets/Macrophages_imagerie',
                    '/home/jupyter/Notebooks/htdocs/Macrophages_imagerie', file) for file in filesNAS])
print("\n".join(filenames))
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419EPA_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419STD_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EDL_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EPA_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420LDL_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EDL_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EPA_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421LDL_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170422LDL_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170424EDL_L03.fsm
In [3]:
# OLD par fichier
# DATA_PATH = ("/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02")
# EXT_FILE = "*.fsm"
# #filenames = sorted(glob.glob(path.join(DATA_PATH, EXT_FILE)))
# filenames = glob.glob(path.join(DATA_PATH, EXT_FILE))
In [4]:
print("\n".join(filenames))
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419EPA_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419STD_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EDL_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EPA_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420LDL_L01.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EDL_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EPA_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421LDL_L02.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170422LDL_L03.fsm
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170424EDL_L03.fsm
In [5]:
# "/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm" == "/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm"
In [6]:
# i = 0
# fsort1 = sorted(filenames)
# fsort2 = sorted(filenames2)

# for f in fsort1:    
#     print(fsort1[i])
#     print(fsort2[i])
#     print(fsort1[i] == fsort2[i])
#     i = i + 1
In [7]:
list_spec = {}
for f in filenames:
    list_spec[f] = specread(f)
In [8]:
print(len(list_spec))
12
In [9]:
cell_medium = ["EDL", "LDL", "STD", "EPA"]
dict_ir = {k: [] for k in cell_medium}
print(dict_ir)
{'EDL': [], 'LDL': [], 'STD': [], 'EPA': []}
In [10]:
#KR
for key in dict_ir.keys():
    for f in filenames:
        if key in f:
             dict_ir[key].append(list_spec[f])
In [11]:
#OLD VERSION

# for key in dict_ir.keys():
#     for f in filenames:
#         if key in f:
#             dict_ir[key].append(specread(f))
In [12]:
print(dict_ir)
{'EDL': [Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[102.41041  102.50108  102.6334   ... 117.68095  124.63407  126.097404]
 [102.1833   102.29499  102.5289   ... 129.85854  138.07028  127.65028 ]
 [102.0728   102.15745  102.36862  ...  97.370964  96.82941   95.83801 ]
 ...
 [101.20574  101.14632  101.353165 ...  96.523544 110.76741  124.48135 ]
 [102.002625 102.18558  102.52742  ... 107.94787  121.876335 141.33583 ]
 [102.98535  103.009186 103.113174 ...  98.65173  115.48307  134.8786  ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420EDL_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 1109.5000233451517, 'y_init': -137.49998758236615, 'z_init': 4000.0, 'n_x': 189, 'n_y': 101, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 08:46:14 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420EDL_L01.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[ 99.4711    99.47538   99.705635 ...  80.75171   87.11026   91.1909  ]
 [101.53501  101.57827  101.71788  ... 128.05081  143.01501  141.09897 ]
 [104.15017  103.99866  104.21387  ... 115.34351  109.42525  102.67608 ]
 ...
 [  0.         0.         0.       ...   0.         0.         0.      ]
 [  0.         0.         0.       ...   0.         0.         0.      ]
 [  0.         0.         0.       ...   0.         0.         0.      ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421EDL_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 1308.2500186264508, 'y_init': 58.25001167257584, 'z_init': 4000.0, 'n_x': 151, 'n_y': 95, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 13:01:10 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421EDL_L02.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[ 99.18622   99.17415   98.970215 ...  89.40593   98.58076  100.90654 ]
 [ 99.61621   99.58943   99.66678  ... 100.6327    92.2831    86.76073 ]
 [100.368095 100.36098  100.24655  ... 108.87523  107.81979  100.7395  ]
 ...
 [102.59632  102.52495  102.4322   ... 109.580574  95.95844   89.17854 ]
 [102.71568  102.57474  102.39938  ... 134.68396  140.88217  134.755   ]
 [102.33903  102.52533  102.37818  ... 102.41948  110.08465  114.507416]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170424EDL_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 4327.750023096799, 'y_init': 3063.6250152736893, 'z_init': 4000.0, 'n_x': 187, 'n_y': 124, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Mon Apr 24 10:21:44 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170424EDL_L03.fsm'} 
], 'LDL': [Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[ 96.76078   96.78547   96.85925  ...  89.256004  85.60106   99.70127 ]
 [ 96.76285   96.898605  97.187126 ...  92.758514 110.21162  122.25818 ]
 [ 97.85429   97.894966  98.22846  ...  99.75266  105.9302   110.99608 ]
 ...
 [ 94.66745   94.7308    94.83478  ...  90.19859   98.231445 122.904625]
 [ 93.45002   93.701256  93.778564 ...  88.477104  83.776886  94.74307 ]
 [ 90.18729   90.25069   90.468185 ...  97.729935 112.131355 126.26607 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420LDL_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 2738.625020240743, 'y_init': 4659.625017260511, 'z_init': 4000.0, 'n_x': 164, 'n_y': 140, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 12:54:10 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420LDL_L01.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[ 91.264046    91.165306    91.232056   ...  98.369286   112.256485
  125.01442   ]
 [ 92.64506     92.759186    92.896645   ... 118.65007    126.823395
  137.74821   ]
 [ 90.88955     90.7332      90.76602    ... 106.29597    108.50493
  115.7798    ]
 ...
 [  0.35872427   0.3458393    0.36662653 ...  23.380354    31.088797
   38.938744  ]
 [  0.24737059   0.27054384   0.27587673 ...  19.26295     22.034353
   30.822155  ]
 [  0.28510052   0.30762413   0.27539393 ...  26.240295    31.464735
   36.443123  ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421LDL_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 4365.750022103389, 'y_init': -89.62498249113624, 'z_init': 4000.0, 'n_x': 179, 'n_y': 142, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 17:24:12 2017', 'image_name': '', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421LDL_L02.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[102.06887  102.015755 101.84997  ... 101.92222  118.8075   135.4712  ]
 [102.23148  101.99641  101.785484 ... 118.73554  108.62514   92.96961 ]
 [102.371765 102.30904  102.116    ... 125.76474  135.4335   150.37312 ]
 ...
 [ 51.392715  51.459137  51.29502  ...  63.80772   68.654015  72.258995]
 [ 53.856514  53.817226  53.74031  ...  69.5356    63.362663  59.94399 ]
 [ 50.831997  50.748993  50.635654 ...  68.99248   70.20383   73.61084 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170422LDL_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 7282.250035514433, 'y_init': 327.87501303851553, 'z_init': 4000.0, 'n_x': 287, 'n_y': 106, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Sat Apr 22 16:57:27 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170422LDL_L03.fsm'} 
], 'STD': [Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[100.56563  100.43746  100.655556 ... 113.87019  130.67267  130.12047 ]
 [100.524345 100.473595 100.56737  ... 106.732666 122.84013  131.91829 ]
 [100.919174 100.96554  101.06892  ... 117.11189  118.59732  112.632675]
 ...
 [ 87.24211   86.936226  86.670456 ... 107.68202  119.20983  132.96117 ]
 [ 87.515755  87.62498   87.95406  ...  74.24934   82.53994  103.41158 ]
 [ 87.020775  87.24106   87.68801  ...  78.700356  94.35523  113.71412 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': 'Image', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': -2266.249990810951, 'y_init': -1309.6249844779575, 'z_init': 4000.0, 'n_x': 75, 'n_y': 126, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Tue Apr 18 19:15:03 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170418STD_L01.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[101.894844 102.122215 102.37582  ... 108.37459  114.91064  124.42043 ]
 [101.98463  101.9201   102.075134 ... 120.49213  121.04182  108.73399 ]
 [101.70301  101.74385  101.854485 ... 110.620674 124.91049  144.94037 ]
 ...
 [ 84.49788   84.428     84.40194  ...  80.52712   92.84899   99.83099 ]
 [ 82.908844  82.956955  82.99871  ...  93.9605    84.04683   68.19029 ]
 [ 81.299225  81.46436   81.68425  ...  80.492065  87.23297  106.00833 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170418STD_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 525.8750160187476, 'y_init': 3944.6250122934575, 'z_init': 4000.0, 'n_x': 130, 'n_y': 100, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Tue Apr 18 21:34:15 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170418STD_L03.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[104.16222  104.399055 104.81766  ... 129.78352  125.29896  127.66168 ]
 [104.1845   104.35017  104.59667  ... 100.570946 112.29748  104.5784  ]
 [104.23213  104.278755 104.478195 ... 101.34886  120.87445  120.509926]
 ...
 [106.33076  106.470024 106.73167  ... 109.113014 120.45274  127.805046]
 [106.257256 106.27672  106.47709  ... 104.11241  110.68616  145.02014 ]
 [106.32239  106.3202   106.47685  ... 103.12103  103.059875 116.71471 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170419STD_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 5415.500024338563, 'y_init': 2364.500015397866, 'z_init': 4000.0, 'n_x': 197, 'n_y': 125, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Wed Apr 19 15:29:38 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170419STD_L02.fsm'} 
], 'EPA': [Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[102.162506 102.20768  102.33103  ... 103.3998   133.57564  132.78831 ]
 [101.934006 101.820946 101.81872  ...  89.18643   82.32647   98.505684]
 [102.250984 101.996895 101.75608  ...  95.217316  99.56977   98.689926]
 ...
 [ 59.419476  59.40246   59.45253  ...  67.783035  73.247665  72.340904]
 [ 59.291172  59.275265  59.470284 ...  59.782402  66.921646  74.762665]
 [ 59.350204  59.409885  59.61621  ...  67.860054  71.12531   94.13546 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170419EPA_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 2986.250023593504, 'y_init': 96.00001490116061, 'z_init': 4000.0, 'n_x': 191, 'n_y': 121, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Wed Apr 19 11:01:00 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170419EPA_L03.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[105.09475  105.04887  105.366974 ... 107.11649  102.99424   91.08595 ]
 [104.76829  104.94343  105.12854  ... 114.303444 117.39385  105.55191 ]
 [103.317024 103.37587  103.6764   ... 103.2221   104.533195 105.97731 ]
 ...
 [103.61411  103.58757  103.67997  ...  78.18494   86.83749   94.49371 ]
 [102.8843   102.737976 102.95702  ... 123.78226  127.77606  128.58452 ]
 [102.098305 102.12563  102.436935 ...  90.693794  89.5122    83.86093 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420EPA_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 3209.000012914339, 'y_init': 1993.875025952855, 'z_init': 4000.0, 'n_x': 105, 'n_y': 210, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 17:33:35 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420EPA_L01.fsm'} 
, Spectrum: 
wavelength:
 [4000. 3998. 3996. ...  754.  752.  750.] 
amplitudes: 
 [[ 95.9081    95.937256  95.97436  ... 103.952126 123.30863  149.24287 ]
 [ 94.429726  94.5522    94.72692  ...  78.5463    83.35162  103.35833 ]
 [ 95.19039   94.98259   95.091064 ... 112.38364   96.644714  93.23278 ]
 ...
 [105.68242  105.791405 106.04749  ...  91.40277  110.49822  134.17737 ]
 [105.587585 105.592026 105.73054  ...  98.074196  93.36444  100.17381 ]
 [105.33544  105.62715  105.80969  ...  96.32086  100.53766  112.24974 ]] 
metadata: 
 {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421EPA_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 3381.1250167638054, 'y_init': -1059.9999880790715, 'z_init': 4000.0, 'n_x': 136, 'n_y': 97, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 09:55:23 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421EPA_L02.fsm'} 
]}
In [13]:
metadata = []

for f in filenames:
    name = path.split(f)[1]
    date, slide = name.split('_')
    date, medium = date[:8], date[8:]
    slide = slide[:3]
    metadata.append({'filename': name,
                     'date': date,
                     'medium': medium,
                     'slide': slide})
In [14]:
metadata = pd.DataFrame(metadata)
print(metadata)
print(metadata['medium'].value_counts())
        date             filename medium slide
0   20170418  20170418STD_L01.fsm    STD   L01
1   20170418  20170418STD_L03.fsm    STD   L03
2   20170419  20170419EPA_L03.fsm    EPA   L03
3   20170419  20170419STD_L02.fsm    STD   L02
4   20170420  20170420EDL_L01.fsm    EDL   L01
5   20170420  20170420EPA_L01.fsm    EPA   L01
6   20170420  20170420LDL_L01.fsm    LDL   L01
7   20170421  20170421EDL_L02.fsm    EDL   L02
8   20170421  20170421EPA_L02.fsm    EPA   L02
9   20170421  20170421LDL_L02.fsm    LDL   L02
10  20170422  20170422LDL_L03.fsm    LDL   L03
11  20170424  20170424EDL_L03.fsm    EDL   L03
EPA    3
STD    3
EDL    3
LDL    3
Name: medium, dtype: int64
In [15]:
spectrum = np.array([sp for key, sp in list_spec.items()])
print(type(spectrum[0]))
spectrumAmp=np.array([spAm.amplitudes for spAm in spectrum])
df_spectrum = pd.DataFrame(spectrum)
for s in spectrum:
    print(len(s.amplitudes))
print(spectrumAmp.shape)
<class 'specio.core.util.Spectrum'>
9450
13000
23111
24625
19089
22050
22960
14345
13192
25418
30422
23188
(12,)
In [16]:
# OLD VERSION

# spectrum = np.array([sp for key, sp in list_spec.items()])
# print(type(spectrum[0]))
# spectrumAmp=np.array([spAm.amplitudes for spAm in spectrum])
# df_spectrum = pd.DataFrame(spectrum)
# for s in spectrum:
#     print(len(s.amplitudes))
# print(spectrumAmp.shape)
In [17]:
spectrum = np.zeros((0,1626))
for i in range(4):
     spectrum=np.concatenate( ( spectrum, spectrumAmp[i]) ) 
In [18]:
#type(list_spec)
#list_spec[next(iter(list_spec))]
In [19]:
df_spectrum = pd.DataFrame(spectrum, columns=list_spec[next(iter(list_spec))].wavelength)
# df_spectrum = pd.DataFrame(spectrum, columns=list_spec[0].wavelength)
In [20]:
df_spectrum = pd.concat([metadata[['date','filename', 'medium','slide']],
                         df_spectrum], axis='columns')
In [21]:
df = df_spectrum.set_index(['date','filename', 'medium','slide'])
print(df.head)
<bound method NDFrame.head of                                                4000.0      3998.0      3996.0  \
date     filename            medium slide                                       
20170418 20170418STD_L01.fsm STD    L01    100.565628  100.437462  100.655556   
         20170418STD_L03.fsm STD    L03    100.524345  100.473595  100.567368   
20170419 20170419EPA_L03.fsm EPA    L03    100.919174  100.965538  101.068916   
         20170419STD_L02.fsm STD    L02    100.857765  100.687714  100.738014   
20170420 20170420EDL_L01.fsm EDL    L01    100.038307  100.001404  100.043625   
         20170420EPA_L01.fsm EPA    L01    100.443130  100.286613  100.237091   
         20170420LDL_L01.fsm LDL    L01     99.781319   99.782127   99.919327   
20170421 20170421EDL_L02.fsm EDL    L02     99.920059   99.922890  100.015640   
         20170421EPA_L02.fsm EPA    L02     98.867813   98.909019   98.822746   
         20170421LDL_L02.fsm LDL    L02     99.324081   99.301086   99.487846   
20170422 20170422LDL_L03.fsm LDL    L03     97.648727   97.473358   97.625854   
20170424 20170424EDL_L03.fsm EDL    L03     97.929924   98.087311   98.401871   
NaN      NaN                 NaN    NaN    100.707558  100.897781  101.146111   
                                    NaN    100.523865  100.686882  101.014961   
                                    NaN    101.409744  101.463905  101.606628   
                                    NaN    101.298958  101.167229  101.249306   
                                    NaN    100.134201  100.167328  100.477051   
                                    NaN    101.181587  101.150063  101.210358   
                                    NaN    101.320335  101.342384  101.352371   
                                    NaN    101.108292  101.222267  101.403160   
                                    NaN     98.958916   98.936974   99.172264   
                                    NaN     96.649536   96.657967   96.780930   
                                    NaN     98.037086   97.986336   98.078285   
                                    NaN     98.633995   98.708939   98.927238   
                                    NaN    101.184662  101.305420  101.402985   
                                    NaN    102.573097  102.672607  102.744850   
                                    NaN    103.063278  103.227814  103.421783   
                                    NaN    103.116150  103.084473  103.313644   
                                    NaN    102.316795  102.298477  102.427322   
                                    NaN    100.783340  100.682831  100.949509   
...                                               ...         ...         ...   
                                    NaN    100.777260  100.811867  100.914032   
                                    NaN    102.247948  102.313698  102.731323   
                                    NaN    102.806313  102.853012  103.050240   
                                    NaN    102.898071  102.975212  103.350914   
                                    NaN    102.622520  102.641243  102.992310   
                                    NaN    102.800278  102.625954  102.720184   
                                    NaN    102.998772  103.173286  103.295296   
                                    NaN    102.566078  102.453514  102.457191   
                                    NaN    101.370506  101.531540  101.880219   
                                    NaN    100.370956  100.366791  100.437279   
                                    NaN    102.368912  102.361794  102.657280   
                                    NaN    103.767342  103.766953  104.029892   
                                    NaN    102.990990  102.826973  102.979378   
                                    NaN    102.646469  102.669449  102.939774   
                                    NaN    102.359398  102.274765  102.665016   
                                    NaN    103.968262  104.009453  104.161545   
                                    NaN    104.205429  104.196121  104.477318   
                                    NaN    103.134033  103.315300  103.592659   
                                    NaN    101.543167  101.688293  101.917000   
                                    NaN    102.990639  102.970367  102.994751   
                                    NaN    104.538132  104.676315  105.022194   
                                    NaN    104.057968  104.072235  104.313950   
                                    NaN    102.132980  102.182739  102.388458   
                                    NaN    103.876236  103.878395  104.080772   
                                    NaN    105.723640  105.937202  106.119095   
                                    NaN    105.846237  105.953285  106.168076   
                                    NaN    106.057655  106.088699  106.293068   
                                    NaN    106.330757  106.470024  106.731667   
                                    NaN    106.257256  106.276718  106.477089   
                                    NaN    106.322388  106.320198  106.476852   

                                               3994.0      3992.0      3990.0  \
date     filename            medium slide                                       
20170418 20170418STD_L01.fsm STD    L01    101.042213  101.127029  101.041862   
         20170418STD_L03.fsm STD    L03    100.859825  101.011810  100.881363   
20170419 20170419EPA_L03.fsm EPA    L03    101.187706  101.240456  101.315536   
         20170419STD_L02.fsm STD    L02    100.876060  101.003120  101.168861   
20170420 20170420EDL_L01.fsm EDL    L01    100.264618  100.468994  100.374901   
         20170420EPA_L01.fsm EPA    L01    100.350197  100.373093  100.387566   
         20170420LDL_L01.fsm LDL    L01    100.200516  100.322189  100.209435   
20170421 20170421EDL_L02.fsm EDL    L02     99.979538   99.937737  100.053764   
         20170421EPA_L02.fsm EPA    L02     98.903679   99.061241   99.115631   
         20170421LDL_L02.fsm LDL    L02     99.790405  100.009796  100.049370   
20170422 20170422LDL_L03.fsm LDL    L03     98.008072   98.248367   98.330795   
20170424 20170424EDL_L03.fsm EDL    L03     98.482269   98.331657   98.288475   
NaN      NaN                 NaN    NaN    101.172783  101.122574  101.068527   
                                    NaN    101.265068  101.418175  101.441475   
                                    NaN    101.688332  101.782608  101.840454   
                                    NaN    101.422386  101.529556  101.549568   
                                    NaN    100.653099  100.691956  100.718109   
                                    NaN    101.335983  101.351372  101.378624   
                                    NaN    101.296646  101.226707  101.358971   
                                    NaN    101.438881  101.384811  101.409660   
                                    NaN     99.350510   99.318413   99.282066   
                                    NaN     96.880531   96.841644   96.969887   
                                    NaN     98.170647   98.209190   98.266426   
                                    NaN     98.998413   98.803551   98.630127   
                                    NaN    101.494896  101.456848  101.366470   
                                    NaN    102.736801  102.739510  102.837883   
                                    NaN    103.618141  103.686699  103.570320   
                                    NaN    103.545265  103.581879  103.541756   
                                    NaN    102.606453  102.640564  102.613075   
                                    NaN    101.157631  101.114922  101.183067   
...                                               ...         ...         ...   
                                    NaN    101.101219  101.146675  101.119804   
                                    NaN    103.064972  103.087975  103.013954   
                                    NaN    103.156837  103.198341  103.283920   
                                    NaN    103.603951  103.672760  103.898476   
                                    NaN    103.319633  103.326538  103.366898   
                                    NaN    102.986526  103.020821  102.987846   
                                    NaN    103.319916  103.391968  103.650070   
                                    NaN    102.541862  102.734886  102.966606   
                                    NaN    102.053375  102.041237  102.092140   
                                    NaN    100.614357  100.803871  100.943771   
                                    NaN    103.046471  103.248253  103.228333   
                                    NaN    104.169853  104.152046  104.284393   
                                    NaN    103.177605  103.156570  103.172562   
                                    NaN    103.141785  103.073181  103.019325   
                                    NaN    103.146568  103.430466  103.598946   
                                    NaN    104.353561  104.509460  104.497269   
                                    NaN    104.772896  104.916801  104.975418   
                                    NaN    103.792297  103.807617  103.782814   
                                    NaN    102.016884  102.050018  102.049782   
                                    NaN    103.211342  103.394142  103.503998   
                                    NaN    105.421814  105.566093  105.452629   
                                    NaN    104.513428  104.505569  104.538742   
                                    NaN    102.558014  102.557755  102.636276   
                                    NaN    104.273811  104.315712  104.451172   
                                    NaN    106.187050  106.149406  106.050110   
                                    NaN    106.332436  106.404892  106.552223   
                                    NaN    106.691406  107.103096  107.253860   
                                    NaN    106.943954  107.062592  107.098076   
                                    NaN    106.683800  106.905350  107.017441   
                                    NaN    106.709534  106.852783  106.942635   

                                               3988.0      3986.0      3984.0  \
date     filename            medium slide                                       
20170418 20170418STD_L01.fsm STD    L01    100.910751  100.865280  100.962021   
         20170418STD_L03.fsm STD    L03    100.574974  100.511772  100.669250   
20170419 20170419EPA_L03.fsm EPA    L03    101.130440  100.767326  100.745483   
         20170419STD_L02.fsm STD    L02    101.041557  100.821655  100.896675   
20170420 20170420EDL_L01.fsm EDL    L01    100.055763   99.954872  100.125397   
         20170420EPA_L01.fsm EPA    L01    100.381561  100.376938  100.457397   
         20170420LDL_L01.fsm LDL    L01    100.021652   99.910301   99.854286   
20170421 20170421EDL_L02.fsm EDL    L02    100.077660   99.982674   99.839264   
         20170421EPA_L02.fsm EPA    L02     98.796257   98.382439   98.376617   
         20170421LDL_L02.fsm LDL    L02     99.777679   99.586403   99.624626   
20170422 20170422LDL_L03.fsm LDL    L03     98.171387   97.917404   97.820259   
20170424 20170424EDL_L03.fsm EDL    L03     98.131828   97.962547   98.051163   
NaN      NaN                 NaN    NaN    100.795059  100.564850  100.699104   
                                    NaN    101.115059  100.809456  100.893379   
                                    NaN    101.699097  101.661537  101.830284   
                                    NaN    101.390434  101.261772  101.378426   
                                    NaN    100.511581  100.294182  100.397125   
                                    NaN    101.270973  101.156845  101.256187   
                                    NaN    101.380615  101.311760  101.436279   
                                    NaN    101.434792  101.451614  101.458961   
                                    NaN     99.134193   99.009178   99.103783   
                                    NaN     97.063652   96.950272   96.899689   
                                    NaN     98.125877   98.046371   98.251450   
                                    NaN     98.455757   98.321823   98.540588   
                                    NaN    101.267822  101.237198  101.302116   
                                    NaN    102.739700  102.484962  102.409019   
                                    NaN    103.238609  103.078979  103.230988   
                                    NaN    103.394341  103.272560  103.316582   
                                    NaN    102.413216  102.069626  102.015404   
                                    NaN    101.117531  100.980415  101.089264   
...                                               ...         ...         ...   
                                    NaN    100.978111  100.748283  100.718498   
                                    NaN    102.831612  102.727715  102.932297   
                                    NaN    103.087715  102.788193  102.853317   
                                    NaN    103.860626  103.579712  103.549484   
                                    NaN    103.366478  103.199524  103.117081   
                                    NaN    102.901306  102.910378  103.046501   
                                    NaN    103.538994  103.199615  103.242645   
                                    NaN    102.842705  102.528267  102.578934   
                                    NaN    101.882034  101.550720  101.521309   
                                    NaN    100.809067  100.624687  100.647217   
                                    NaN    102.857292  102.508919  102.616096   
                                    NaN    104.121857  103.679535  103.665115   
                                    NaN    103.113266  103.075020  103.266174   
                                    NaN    102.884338  102.757912  102.936836   
                                    NaN    103.413574  103.101234  103.070282   
                                    NaN    104.206413  104.012299  104.179771   
                                    NaN    104.719467  104.321556  104.263184   
                                    NaN    103.485748  103.117172  103.178848   
                                    NaN    101.703392  101.331001  101.460556   
                                    NaN    103.404640  103.125984  103.024109   
                                    NaN    105.042053  104.697525  104.769348   
                                    NaN    104.496307  104.325920  104.499146   
                                    NaN    102.570503  102.329849  102.377327   
                                    NaN    104.364967  104.014046  104.025848   
                                    NaN    105.791168  105.635605  105.882988   
                                    NaN    106.526405  106.292870  106.261147   
                                    NaN    106.931076  106.535164  106.412582   
                                    NaN    106.924377  106.762016  106.858765   
                                    NaN    106.706062  106.422119  106.552071   
                                    NaN    106.916428  106.779396  106.810669   

                                               3982.0     ...          768.0   \
date     filename            medium slide                 ...                   
20170418 20170418STD_L01.fsm STD    L01    101.082581     ...       97.847168   
         20170418STD_L03.fsm STD    L03    100.680412     ...       88.620834   
20170419 20170419EPA_L03.fsm EPA    L03    100.949631     ...       97.180229   
         20170419STD_L02.fsm STD    L02    100.990318     ...       97.403694   
20170420 20170420EDL_L01.fsm EDL    L01    100.189461     ...       91.867386   
         20170420EPA_L01.fsm EPA    L01    100.403809     ...       91.011871   
         20170420LDL_L01.fsm LDL    L01     99.849792     ...       92.671143   
20170421 20170421EDL_L02.fsm EDL    L02     99.756645     ...       89.964493   
         20170421EPA_L02.fsm EPA    L02     98.610786     ...      104.043015   
         20170421LDL_L02.fsm LDL    L02     99.642006     ...       95.606041   
20170422 20170422LDL_L03.fsm LDL    L03     97.902710     ...       92.277618   
20170424 20170424EDL_L03.fsm EDL    L03     98.105408     ...       96.227180   
NaN      NaN                 NaN    NaN    100.895401     ...      108.243217   
                                    NaN    101.052010     ...       93.163918   
                                    NaN    101.912880     ...      104.118797   
                                    NaN    101.407669     ...       87.206390   
                                    NaN    100.556946     ...       94.742813   
                                    NaN    101.448196     ...       90.703300   
                                    NaN    101.612427     ...       96.453201   
                                    NaN    101.401611     ...       91.815041   
                                    NaN     99.291458     ...       92.194069   
                                    NaN     96.914688     ...       82.858711   
                                    NaN     98.296463     ...       94.943924   
                                    NaN     98.793625     ...       87.688866   
                                    NaN    101.376289     ...      101.239548   
                                    NaN    102.508377     ...       86.944206   
                                    NaN    103.374344     ...       99.760765   
                                    NaN    103.286270     ...       89.517639   
                                    NaN    102.244118     ...       96.053947   
                                    NaN    101.146576     ...       88.004372   
...                                               ...     ...             ...   
                                    NaN    100.902367     ...       87.174370   
                                    NaN    103.077293     ...      100.824036   
                                    NaN    103.035210     ...       90.893433   
                                    NaN    103.518913     ...       92.329071   
                                    NaN    103.154640     ...       89.028191   
                                    NaN    103.086075     ...       98.479439   
                                    NaN    103.512543     ...       82.885292   
                                    NaN    102.883621     ...       92.459297   
                                    NaN    101.629356     ...       81.153015   
                                    NaN    100.730591     ...       92.652802   
                                    NaN    102.881889     ...       87.379326   
                                    NaN    103.981987     ...       95.537773   
                                    NaN    103.372910     ...       87.237984   
                                    NaN    103.144638     ...       83.719475   
                                    NaN    103.168159     ...       93.469635   
                                    NaN    104.433418     ...       93.624466   
                                    NaN    104.366058     ...       87.932289   
                                    NaN    103.468414     ...       94.257248   
                                    NaN    101.780052     ...       85.009865   
                                    NaN    103.172470     ...       77.939697   
                                    NaN    104.979858     ...       81.837387   
                                    NaN    104.841652     ...       94.542046   
                                    NaN    102.612976     ...       99.870026   
                                    NaN    104.471138     ...       93.032730   
                                    NaN    106.185516     ...       84.111847   
                                    NaN    106.413635     ...       94.299927   
                                    NaN    106.549843     ...       79.366203   
                                    NaN    106.876579     ...       94.780693   
                                    NaN    106.708603     ...       86.119209   
                                    NaN    106.930885     ...       89.656952   

                                               766.0       764.0       762.0   \
date     filename            medium slide                                       
20170418 20170418STD_L01.fsm STD    L01     99.934502   98.916954   97.683571   
         20170418STD_L03.fsm STD    L03     88.410263   88.908142   88.539413   
20170419 20170419EPA_L03.fsm EPA    L03     97.375801  102.663254  114.905739   
         20170419STD_L02.fsm STD    L02     91.576401   89.823914   91.289764   
20170420 20170420EDL_L01.fsm EDL    L01     87.589760   87.946770   98.988518   
         20170420EPA_L01.fsm EPA    L01     92.567955   86.593269   83.686249   
         20170420LDL_L01.fsm LDL    L01     97.254547  100.758766  100.427223   
20170421 20170421EDL_L02.fsm EDL    L02     90.353584   97.386131  101.835999   
         20170421EPA_L02.fsm EPA    L02    105.049744  101.846985   96.491173   
         20170421LDL_L02.fsm LDL    L02     87.655197   84.711105   86.612267   
20170422 20170422LDL_L03.fsm LDL    L03     89.417854   90.469185   94.064812   
20170424 20170424EDL_L03.fsm EDL    L03     94.055908   97.769157  100.939896   
NaN      NaN                 NaN    NaN    103.757820   98.143608  102.016747   
                                    NaN     93.197144   89.236572   85.776962   
                                    NaN    104.342163  112.134460  119.816986   
                                    NaN     87.721451   88.294357   91.709450   
                                    NaN    100.480103  101.994179  107.915695   
                                    NaN     92.288826   91.804420   84.179291   
                                    NaN     92.578758   91.326553   91.119362   
                                    NaN     95.910400   99.541359   94.143921   
                                    NaN     96.189926  101.902031  104.882935   
                                    NaN     86.562057   89.648300   91.527390   
                                    NaN     94.446526   96.256729   95.144218   
                                    NaN     83.453026   83.650505   87.118317   
                                    NaN    102.612473  100.282814   96.952637   
                                    NaN     89.071732   93.803757   99.715996   
                                    NaN     99.210052  104.921722  105.551941   
                                    NaN     89.289810   88.998016   91.661736   
                                    NaN    103.091301  117.383934  121.030472   
                                    NaN     86.677589   90.121895   98.115494   
...                                               ...         ...         ...   
                                    NaN     91.290176   95.661148   94.256355   
                                    NaN     96.268364   92.197662   97.155876   
                                    NaN     92.403267   94.108315   88.985130   
                                    NaN     92.321854   95.362221   96.325684   
                                    NaN     95.365204   95.285606   95.824982   
                                    NaN     96.746750  100.504051   98.223961   
                                    NaN     79.937317   80.184105   82.810097   
                                    NaN     84.320915   83.680252   92.836510   
                                    NaN     81.095695   82.739487   85.033768   
                                    NaN     97.657761   96.038300   91.474518   
                                    NaN     85.903908   79.372910   78.047264   
                                    NaN     98.187317   92.996117   85.032242   
                                    NaN     97.028976  102.408615   98.007240   
                                    NaN     89.724762   95.743675   97.689003   
                                    NaN     92.720810   87.362144   88.195557   
                                    NaN     96.190445  100.837036   98.078812   
                                    NaN     86.791588   90.860252   87.718681   
                                    NaN     87.585037   79.162239   79.784500   
                                    NaN     84.847832   86.460388   91.257790   
                                    NaN     79.889091   84.901070   95.860443   
                                    NaN     81.122726   83.827202   84.746201   
                                    NaN     88.900993   91.196976   96.487717   
                                    NaN    100.818489   94.008492   92.473572   
                                    NaN     93.969879   86.749847   71.764000   
                                    NaN     82.870277   83.061073   85.660034   
                                    NaN     84.158897   82.856895   87.493027   
                                    NaN     84.372948   89.287621   91.708710   
                                    NaN    106.678513  111.632858  100.797249   
                                    NaN     78.669502   78.765411   83.522842   
                                    NaN     96.044250  103.532494  101.821732   

                                               760.0       758.0       756.0   \
date     filename            medium slide                                       
20170418 20170418STD_L01.fsm STD    L01     92.842155   96.027298   96.088387   
         20170418STD_L03.fsm STD    L03     86.191490   84.405281   87.334167   
20170419 20170419EPA_L03.fsm EPA    L03    118.503342  116.997360  119.955429   
         20170419STD_L02.fsm STD    L02     88.808784   95.976746  111.838791   
20170420 20170420EDL_L01.fsm EDL    L01    107.015526  107.126251  110.778641   
         20170420EPA_L01.fsm EPA    L01     85.868645   97.246002  110.951599   
         20170420LDL_L01.fsm LDL    L01     96.501488   95.518997   95.328941   
20170421 20170421EDL_L02.fsm EDL    L02     95.810768   91.124649   94.859505   
         20170421EPA_L02.fsm EPA    L02     94.230507  103.275208  109.447952   
         20170421LDL_L02.fsm LDL    L02     80.581963   82.191971   89.463554   
20170422 20170422LDL_L03.fsm LDL    L03     95.721458   89.059097   89.150780   
20170424 20170424EDL_L03.fsm EDL    L03     94.171326   84.503754   87.896156   
NaN      NaN                 NaN    NaN    110.541435  112.555496  112.099350   
                                    NaN     86.177628   86.120354   80.234840   
                                    NaN    122.106201  128.222351  130.554031   
                                    NaN     96.725685   95.878967  106.070190   
                                    NaN    109.565353  100.807991   98.328598   
                                    NaN     78.364723   81.415543   83.699532   
                                    NaN     85.425476   82.000771   88.244247   
                                    NaN     88.442635   89.258858   89.521736   
                                    NaN    102.331520  104.790810  105.319313   
                                    NaN     95.512703   96.776672   92.712936   
                                    NaN     96.771492  102.043358  105.163612   
                                    NaN     92.217873   93.079643   94.566597   
                                    NaN    101.188896  102.033966   94.247391   
                                    NaN    102.025940  100.503807  100.743172   
                                    NaN     96.492523   96.473763  110.565521   
                                    NaN     97.152466  101.936752  103.548485   
                                    NaN    110.498100  100.448959   96.733490   
                                    NaN    106.968147   99.374992   85.719589   
...                                               ...         ...         ...   
                                    NaN     88.306602   86.367424   84.838478   
                                    NaN    105.982300  109.171600  101.042694   
                                    NaN     83.297440   84.626945   87.911278   
                                    NaN     97.015106   94.567345   86.227242   
                                    NaN    103.954407  103.351685   96.190948   
                                    NaN     91.974136  100.171455  111.190247   
                                    NaN     82.426643   85.251747   84.637634   
                                    NaN    102.717896  107.364151  111.667023   
                                    NaN     83.235626   86.032768   97.710838   
                                    NaN    100.250175  114.536087  114.504799   
                                    NaN     87.425621   95.255112   93.280022   
                                    NaN     84.818810   91.358185  104.639030   
                                    NaN    103.403496  110.116470   97.084328   
                                    NaN     93.761063   95.600655  101.437691   
                                    NaN     94.011917   94.566177   87.154015   
                                    NaN     96.323486  102.080765  103.145172   
                                    NaN     86.979218   92.121437   93.472984   
                                    NaN     95.074821  106.094154  108.195168   
                                    NaN     88.581253   80.782463   91.900406   
                                    NaN    101.654381  102.141472  109.681610   
                                    NaN     88.912285   89.839912   99.810211   
                                    NaN     98.597908   97.906975  103.217842   
                                    NaN    103.089981  103.564880   99.391121   
                                    NaN     71.467560   85.254601   97.048798   
                                    NaN     85.424843   77.223602   79.307907   
                                    NaN     91.339355  101.145294  102.862724   
                                    NaN     91.835510   85.769707   83.440140   
                                    NaN     87.645233   91.677406  101.393486   
                                    NaN     94.490059   97.960152   99.420647   
                                    NaN     98.119583  106.574509  114.233269   

                                               754.0       752.0       750.0   
date     filename            medium slide                                      
20170418 20170418STD_L01.fsm STD    L01    113.870193  130.672668  130.120468  
         20170418STD_L03.fsm STD    L03    106.732666  122.840134  131.918289  
20170419 20170419EPA_L03.fsm EPA    L03    117.111893  118.597321  112.632675  
         20170419STD_L02.fsm STD    L02    112.974892   95.770256   90.075630  
20170420 20170420EDL_L01.fsm EDL    L01    111.859688  124.256477  139.328354  
         20170420EPA_L01.fsm EPA    L01    106.214729   95.746674  102.821426  
         20170420LDL_L01.fsm LDL    L01    105.457169  120.860954  134.041229  
20170421 20170421EDL_L02.fsm EDL    L02     91.000809   98.432159  115.355316  
         20170421EPA_L02.fsm EPA    L02    104.784416   98.080559  112.955231  
         20170421LDL_L02.fsm LDL    L02     98.798721  114.556267  129.300308  
20170422 20170422LDL_L03.fsm LDL    L03     96.951324  104.087776  129.143875  
20170424 20170424EDL_L03.fsm EDL    L03     93.599846   90.318748  102.521927  
NaN      NaN                 NaN    NaN    120.949921  134.045258  134.507690  
                                    NaN     77.837006   85.999451   96.042374  
                                    NaN    126.487251  122.507408  125.694550  
                                    NaN    114.774338  110.014786  108.176598  
                                    NaN    116.974342  136.313232  142.106888  
                                    NaN     86.489349   89.476707  110.592361  
                                    NaN     98.255325  115.094772  112.032875  
                                    NaN     89.980728   95.863831  133.690338  
                                    NaN     92.559456   93.372200  111.725449  
                                    NaN     89.670258  104.136192  129.500397  
                                    NaN    102.873634   88.581467   93.870941  
                                    NaN     99.420502   95.210159  109.786858  
                                    NaN     86.744728   92.390434  110.039902  
                                    NaN     96.428345   91.777588  102.133469  
                                    NaN    115.881119  111.549980  124.439857  
                                    NaN    108.461761  114.816826  103.633095  
                                    NaN    107.314552  127.992340  144.923645  
                                    NaN     87.516823   83.144257   79.988937  
...                                               ...         ...         ...  
                                    NaN     80.131302   91.170433  110.405586  
                                    NaN    102.089340  103.251915  100.954506  
                                    NaN    100.101814  106.612007  115.218018  
                                    NaN     91.454987  109.662926  130.148880  
                                    NaN     87.868607   86.699188  100.751404  
                                    NaN    111.961784  118.116463  139.369934  
                                    NaN     88.090385   96.863525  116.877663  
                                    NaN    113.579231  118.856903  126.939423  
                                    NaN    103.149506   97.596558   92.920403  
                                    NaN    110.349564  114.752075  126.208321  
                                    NaN    102.702995  114.197426  110.894417  
                                    NaN    108.639786  108.381905  120.923454  
                                    NaN     91.397369   94.453857   99.098465  
                                    NaN    104.611053  121.836426  144.519989  
                                    NaN     83.709595   84.987785   98.436295  
                                    NaN    109.984978  107.802322  108.511925  
                                    NaN     80.918518   69.891281   87.796272  
                                    NaN    106.367905  109.786850  101.845345  
                                    NaN    101.259224  106.773705  124.803596  
                                    NaN    117.546150  120.042358  127.797256  
                                    NaN    101.262184   97.645584  110.235054  
                                    NaN    116.192940  124.565170  137.355240  
                                    NaN     93.998566   83.536140   85.828781  
                                    NaN     98.797340  100.294891  110.495911  
                                    NaN     97.077164  110.824722  118.402489  
                                    NaN    110.327568  126.226738  148.152344  
                                    NaN     98.902176  105.689407   98.046310  
                                    NaN    109.113014  120.452744  127.805046  
                                    NaN    104.112411  110.686157  145.020142  
                                    NaN    103.121033  103.059875  116.714706  

[70186 rows x 1626 columns]>
In [22]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(df.groupby('medium').mean().T)
plt.show()
In [23]:
pca = PCA(n_components=6)
In [24]:
pca.fit(spectrum)
Out[24]:
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
In [25]:
wavelength=list_spec[next(iter(list_spec))].wavelength
#wavelength=list_spec[0].wavelength
plt.plot(wavelength,pca.components_[:6].T)
plt.show()
In [26]:
plt.plot(pca.explained_variance_ratio_[0:5])
plt.show()
In [27]:
print(pca.explained_variance_ratio_[0:5])
[0.71655348 0.2016908  0.02776846 0.02209929 0.01167137]
In [28]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
In [29]:
pca_norm = make_pipeline(StandardScaler(), PCA(n_components=6, whiten=True))
pca = PCA(n_components=6, whiten=True)
X_r = pca.fit_transform(spectrum)
X_r = pca_norm.fit_transform(spectrum)
In [30]:
print(X_r.shape)
(70186, 6)
In [31]:
plt.plot(wavelength,pca.components_[0].T)
plt.axvspan(1000, 1120, alpha=0.2, color='grey')
plt.show()
In [32]:
plt.plot(wavelength,pca.components_[1].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
In [33]:
plt.plot(wavelength,pca.components_[2].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
In [34]:
plt.plot(wavelength,pca.components_[3].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
In [35]:
plt.plot(wavelength,pca.components_[4].T)
plt.axvspan(2400, 2450, alpha=0.2, color='grey')
plt.show()
In [36]:
plt.plot(wavelength,pca.components_[5].T)
plt.axvspan(1000, 1200, alpha=0.2, color='grey')
plt.show()
In [37]:
for medium in metadata['medium'].unique() :
    mask = metadata['medium'] == medium
    plt.scatter(X_r[mask, 0], X_r[mask, 1],
                label=medium, s=5, alpha=0.7)
plt.legend()
plt.show()
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-37-b85e9a59e20e> in <module>()
      1 for medium in metadata['medium'].unique() :
      2     mask = metadata['medium'] == medium
----> 3     plt.scatter(X_r[mask, 0], X_r[mask, 1],
      4                 label=medium, s=5, alpha=0.7)
      5 plt.legend()

IndexError: boolean index did not match indexed array along dimension 0; dimension is 70186 but corresponding boolean dimension is 12
In [ ]:
plt.scatter(X_r[:, 0], X_r[:, 1])
plt.show()
In [ ]:
df_spectrum