from specio import specread
from os import path
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from SPARQLWrapper import SPARQLWrapper, JSON
import re
sparql = SPARQLWrapper("https://opendata1.opendata.u-psud.fr/sparql/")
sparql.setQuery("""
prefix projectInvivo: <http://invivo.lipsys2.u-psud.fr/wiki/Data:Analyse_des_macrophages#>
prefix daapp: <http://daap.eu/wiki/Data:Project#>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix vcard: <http://www.w3.org/2006/vcard/ns#>
select DISTINCT ?picture
where {
<http://daap.eu/wiki/Analyse_des_macrophages_Méthode_2_Campagne_2_:_2017>
projectInvivo:hasSlide ?slide .
?slide projectInvivo:hasPicture ?picture .
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
filesNAS = [result["picture"]["value"] for result in results["results"]["bindings"]]
filenames = sorted([re.sub('file://filer.ups.u-psud.fr/archivagesciences\$/Chimie%20Analytique/projets/Macrophages_imagerie',
'/home/jupyter/Notebooks/htdocs/Macrophages_imagerie', file) for file in filesNAS])
print("\n".join(filenames))
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419EPA_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419STD_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EDL_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EPA_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420LDL_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EDL_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EPA_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421LDL_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170422LDL_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170424EDL_L03.fsm
# OLD par fichier
# DATA_PATH = ("/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02")
# EXT_FILE = "*.fsm"
# #filenames = sorted(glob.glob(path.join(DATA_PATH, EXT_FILE)))
# filenames = glob.glob(path.join(DATA_PATH, EXT_FILE))
print("\n".join(filenames))
/home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419EPA_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170419STD_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EDL_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420EPA_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170420LDL_L01.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EDL_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421EPA_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170421LDL_L02.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170422LDL_L03.fsm /home/jupyter/Notebooks/htdocs/Macrophages_imagerie/Methode_02/Campagne_02/20170424EDL_L03.fsm
# "/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm" == "/home/jupyter/Notebooks/Macrophages_imagerie/Methode_02/Campagne_02/20170418STD_L01.fsm"
# i = 0
# fsort1 = sorted(filenames)
# fsort2 = sorted(filenames2)
# for f in fsort1:
# print(fsort1[i])
# print(fsort2[i])
# print(fsort1[i] == fsort2[i])
# i = i + 1
list_spec = {}
for f in filenames:
list_spec[f] = specread(f)
print(len(list_spec))
12
cell_medium = ["EDL", "LDL", "STD", "EPA"]
dict_ir = {k: [] for k in cell_medium}
print(dict_ir)
{'EDL': [], 'LDL': [], 'STD': [], 'EPA': []}
#KR
for key in dict_ir.keys():
for f in filenames:
if key in f:
dict_ir[key].append(list_spec[f])
#OLD VERSION
# for key in dict_ir.keys():
# for f in filenames:
# if key in f:
# dict_ir[key].append(specread(f))
print(dict_ir)
{'EDL': [Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[102.41041 102.50108 102.6334 ... 117.68095 124.63407 126.097404] [102.1833 102.29499 102.5289 ... 129.85854 138.07028 127.65028 ] [102.0728 102.15745 102.36862 ... 97.370964 96.82941 95.83801 ] ... [101.20574 101.14632 101.353165 ... 96.523544 110.76741 124.48135 ] [102.002625 102.18558 102.52742 ... 107.94787 121.876335 141.33583 ] [102.98535 103.009186 103.113174 ... 98.65173 115.48307 134.8786 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420EDL_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 1109.5000233451517, 'y_init': -137.49998758236615, 'z_init': 4000.0, 'n_x': 189, 'n_y': 101, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 08:46:14 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420EDL_L01.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[ 99.4711 99.47538 99.705635 ... 80.75171 87.11026 91.1909 ] [101.53501 101.57827 101.71788 ... 128.05081 143.01501 141.09897 ] [104.15017 103.99866 104.21387 ... 115.34351 109.42525 102.67608 ] ... [ 0. 0. 0. ... 0. 0. 0. ] [ 0. 0. 0. ... 0. 0. 0. ] [ 0. 0. 0. ... 0. 0. 0. ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421EDL_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 1308.2500186264508, 'y_init': 58.25001167257584, 'z_init': 4000.0, 'n_x': 151, 'n_y': 95, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 13:01:10 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421EDL_L02.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[ 99.18622 99.17415 98.970215 ... 89.40593 98.58076 100.90654 ] [ 99.61621 99.58943 99.66678 ... 100.6327 92.2831 86.76073 ] [100.368095 100.36098 100.24655 ... 108.87523 107.81979 100.7395 ] ... [102.59632 102.52495 102.4322 ... 109.580574 95.95844 89.17854 ] [102.71568 102.57474 102.39938 ... 134.68396 140.88217 134.755 ] [102.33903 102.52533 102.37818 ... 102.41948 110.08465 114.507416]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170424EDL_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 4327.750023096799, 'y_init': 3063.6250152736893, 'z_init': 4000.0, 'n_x': 187, 'n_y': 124, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Mon Apr 24 10:21:44 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170424EDL_L03.fsm'} ], 'LDL': [Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[ 96.76078 96.78547 96.85925 ... 89.256004 85.60106 99.70127 ] [ 96.76285 96.898605 97.187126 ... 92.758514 110.21162 122.25818 ] [ 97.85429 97.894966 98.22846 ... 99.75266 105.9302 110.99608 ] ... [ 94.66745 94.7308 94.83478 ... 90.19859 98.231445 122.904625] [ 93.45002 93.701256 93.778564 ... 88.477104 83.776886 94.74307 ] [ 90.18729 90.25069 90.468185 ... 97.729935 112.131355 126.26607 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420LDL_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 2738.625020240743, 'y_init': 4659.625017260511, 'z_init': 4000.0, 'n_x': 164, 'n_y': 140, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 12:54:10 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420LDL_L01.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[ 91.264046 91.165306 91.232056 ... 98.369286 112.256485 125.01442 ] [ 92.64506 92.759186 92.896645 ... 118.65007 126.823395 137.74821 ] [ 90.88955 90.7332 90.76602 ... 106.29597 108.50493 115.7798 ] ... [ 0.35872427 0.3458393 0.36662653 ... 23.380354 31.088797 38.938744 ] [ 0.24737059 0.27054384 0.27587673 ... 19.26295 22.034353 30.822155 ] [ 0.28510052 0.30762413 0.27539393 ... 26.240295 31.464735 36.443123 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421LDL_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 4365.750022103389, 'y_init': -89.62498249113624, 'z_init': 4000.0, 'n_x': 179, 'n_y': 142, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 17:24:12 2017', 'image_name': '', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421LDL_L02.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[102.06887 102.015755 101.84997 ... 101.92222 118.8075 135.4712 ] [102.23148 101.99641 101.785484 ... 118.73554 108.62514 92.96961 ] [102.371765 102.30904 102.116 ... 125.76474 135.4335 150.37312 ] ... [ 51.392715 51.459137 51.29502 ... 63.80772 68.654015 72.258995] [ 53.856514 53.817226 53.74031 ... 69.5356 63.362663 59.94399 ] [ 50.831997 50.748993 50.635654 ... 68.99248 70.20383 73.61084 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170422LDL_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 7282.250035514433, 'y_init': 327.87501303851553, 'z_init': 4000.0, 'n_x': 287, 'n_y': 106, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Sat Apr 22 16:57:27 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170422LDL_L03.fsm'} ], 'STD': [Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[100.56563 100.43746 100.655556 ... 113.87019 130.67267 130.12047 ] [100.524345 100.473595 100.56737 ... 106.732666 122.84013 131.91829 ] [100.919174 100.96554 101.06892 ... 117.11189 118.59732 112.632675] ... [ 87.24211 86.936226 86.670456 ... 107.68202 119.20983 132.96117 ] [ 87.515755 87.62498 87.95406 ... 74.24934 82.53994 103.41158 ] [ 87.020775 87.24106 87.68801 ... 78.700356 94.35523 113.71412 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': 'Image', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': -2266.249990810951, 'y_init': -1309.6249844779575, 'z_init': 4000.0, 'n_x': 75, 'n_y': 126, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Tue Apr 18 19:15:03 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170418STD_L01.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[101.894844 102.122215 102.37582 ... 108.37459 114.91064 124.42043 ] [101.98463 101.9201 102.075134 ... 120.49213 121.04182 108.73399 ] [101.70301 101.74385 101.854485 ... 110.620674 124.91049 144.94037 ] ... [ 84.49788 84.428 84.40194 ... 80.52712 92.84899 99.83099 ] [ 82.908844 82.956955 82.99871 ... 93.9605 84.04683 68.19029 ] [ 81.299225 81.46436 81.68425 ... 80.492065 87.23297 106.00833 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170418STD_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 525.8750160187476, 'y_init': 3944.6250122934575, 'z_init': 4000.0, 'n_x': 130, 'n_y': 100, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Tue Apr 18 21:34:15 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170418STD_L03.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[104.16222 104.399055 104.81766 ... 129.78352 125.29896 127.66168 ] [104.1845 104.35017 104.59667 ... 100.570946 112.29748 104.5784 ] [104.23213 104.278755 104.478195 ... 101.34886 120.87445 120.509926] ... [106.33076 106.470024 106.73167 ... 109.113014 120.45274 127.805046] [106.257256 106.27672 106.47709 ... 104.11241 110.68616 145.02014 ] [106.32239 106.3202 106.47685 ... 103.12103 103.059875 116.71471 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170419STD_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 5415.500024338563, 'y_init': 2364.500015397866, 'z_init': 4000.0, 'n_x': 197, 'n_y': 125, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Wed Apr 19 15:29:38 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170419STD_L02.fsm'} ], 'EPA': [Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[102.162506 102.20768 102.33103 ... 103.3998 133.57564 132.78831 ] [101.934006 101.820946 101.81872 ... 89.18643 82.32647 98.505684] [102.250984 101.996895 101.75608 ... 95.217316 99.56977 98.689926] ... [ 59.419476 59.40246 59.45253 ... 67.783035 73.247665 72.340904] [ 59.291172 59.275265 59.470284 ... 59.782402 66.921646 74.762665] [ 59.350204 59.409885 59.61621 ... 67.860054 71.12531 94.13546 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170419EPA_L03', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 2986.250023593504, 'y_init': 96.00001490116061, 'z_init': 4000.0, 'n_x': 191, 'n_y': 121, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Wed Apr 19 11:01:00 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170419EPA_L03.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[105.09475 105.04887 105.366974 ... 107.11649 102.99424 91.08595 ] [104.76829 104.94343 105.12854 ... 114.303444 117.39385 105.55191 ] [103.317024 103.37587 103.6764 ... 103.2221 104.533195 105.97731 ] ... [103.61411 103.58757 103.67997 ... 78.18494 86.83749 94.49371 ] [102.8843 102.737976 102.95702 ... 123.78226 127.77606 128.58452 ] [102.098305 102.12563 102.436935 ... 90.693794 89.5122 83.86093 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170420EPA_L01', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 3209.000012914339, 'y_init': 1993.875025952855, 'z_init': 4000.0, 'n_x': 105, 'n_y': 210, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Thu Apr 20 17:33:35 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170420EPA_L01.fsm'} , Spectrum: wavelength: [4000. 3998. 3996. ... 754. 752. 750.] amplitudes: [[ 95.9081 95.937256 95.97436 ... 103.952126 123.30863 149.24287 ] [ 94.429726 94.5522 94.72692 ... 78.5463 83.35162 103.35833 ] [ 95.19039 94.98259 95.091064 ... 112.38364 96.644714 93.23278 ] ... [105.68242 105.791405 106.04749 ... 91.40277 110.49822 134.17737 ] [105.587585 105.592026 105.73054 ... 98.074196 93.36444 100.17381 ] [105.33544 105.62715 105.80969 ... 96.32086 100.53766 112.24974 ]] metadata: {'signature': b'PEPE', 'description': 'DataSet - 4DConst3DInterval\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'name': '20170421EPA_L02', 'x_delta': 6.249999751647323, 'y_delta': 6.249999751647323, 'z_delta': -2.0, 'z_start': 4000.0, 'z_end': 750.0, 'z_4d_start': 0.0, 'z_4d_end': 0.0, 'x_init': 3381.1250167638054, 'y_init': -1059.9999880790715, 'z_init': 4000.0, 'n_x': 136, 'n_y': 97, 'n_z': 1626, 'text1': 89, 'text2': 32, 'resolution': 26989, 'text3': 99, 'transmission': 28530, 'text4': 109, 'analyst': '', 'date': 'Fri Apr 21 09:55:23 2017', 'image_name': 'Default', 'instrument_model': 'Spotlight/Frontier FT-NIR', 'instrument_serial_number': '/94002', 'instrument_software_version': '00.01.0060/00.09.1061', 'accumulations': 16, 'detector': 'MCT Array ', 'source': 'MIR', 'beam_splitter': 'OptKBr', 'apodization': 'Filler', 'spectrum_type': 'Ratio (%T)', 'beam_type': 'Sample', 'phase_correction': 'Magnitude', 'ir_accessory': 'Spotlight 400-Enh', 'igram_type': 'Double', 'scan_direction': 'Unidirection', 'background_scans': 750, 'ir_laser_wave_number_unit': '%T', 'filename': '20170421EPA_L02.fsm'} ]}
metadata = []
for f in filenames:
name = path.split(f)[1]
date, slide = name.split('_')
date, medium = date[:8], date[8:]
slide = slide[:3]
metadata.append({'filename': name,
'date': date,
'medium': medium,
'slide': slide})
metadata = pd.DataFrame(metadata)
print(metadata)
print(metadata['medium'].value_counts())
date filename medium slide 0 20170418 20170418STD_L01.fsm STD L01 1 20170418 20170418STD_L03.fsm STD L03 2 20170419 20170419EPA_L03.fsm EPA L03 3 20170419 20170419STD_L02.fsm STD L02 4 20170420 20170420EDL_L01.fsm EDL L01 5 20170420 20170420EPA_L01.fsm EPA L01 6 20170420 20170420LDL_L01.fsm LDL L01 7 20170421 20170421EDL_L02.fsm EDL L02 8 20170421 20170421EPA_L02.fsm EPA L02 9 20170421 20170421LDL_L02.fsm LDL L02 10 20170422 20170422LDL_L03.fsm LDL L03 11 20170424 20170424EDL_L03.fsm EDL L03 EPA 3 STD 3 EDL 3 LDL 3 Name: medium, dtype: int64
spectrum = np.array([sp for key, sp in list_spec.items()])
print(type(spectrum[0]))
spectrumAmp=np.array([spAm.amplitudes for spAm in spectrum])
df_spectrum = pd.DataFrame(spectrum)
for s in spectrum:
print(len(s.amplitudes))
print(spectrumAmp.shape)
<class 'specio.core.util.Spectrum'> 9450 13000 23111 24625 19089 22050 22960 14345 13192 25418 30422 23188 (12,)
# OLD VERSION
# spectrum = np.array([sp for key, sp in list_spec.items()])
# print(type(spectrum[0]))
# spectrumAmp=np.array([spAm.amplitudes for spAm in spectrum])
# df_spectrum = pd.DataFrame(spectrum)
# for s in spectrum:
# print(len(s.amplitudes))
# print(spectrumAmp.shape)
spectrum = np.zeros((0,1626))
for i in range(4):
spectrum=np.concatenate( ( spectrum, spectrumAmp[i]) )
#type(list_spec)
#list_spec[next(iter(list_spec))]
df_spectrum = pd.DataFrame(spectrum, columns=list_spec[next(iter(list_spec))].wavelength)
# df_spectrum = pd.DataFrame(spectrum, columns=list_spec[0].wavelength)
df_spectrum = pd.concat([metadata[['date','filename', 'medium','slide']],
df_spectrum], axis='columns')
df = df_spectrum.set_index(['date','filename', 'medium','slide'])
print(df.head)
<bound method NDFrame.head of 4000.0 3998.0 3996.0 \ date filename medium slide 20170418 20170418STD_L01.fsm STD L01 100.565628 100.437462 100.655556 20170418STD_L03.fsm STD L03 100.524345 100.473595 100.567368 20170419 20170419EPA_L03.fsm EPA L03 100.919174 100.965538 101.068916 20170419STD_L02.fsm STD L02 100.857765 100.687714 100.738014 20170420 20170420EDL_L01.fsm EDL L01 100.038307 100.001404 100.043625 20170420EPA_L01.fsm EPA L01 100.443130 100.286613 100.237091 20170420LDL_L01.fsm LDL L01 99.781319 99.782127 99.919327 20170421 20170421EDL_L02.fsm EDL L02 99.920059 99.922890 100.015640 20170421EPA_L02.fsm EPA L02 98.867813 98.909019 98.822746 20170421LDL_L02.fsm LDL L02 99.324081 99.301086 99.487846 20170422 20170422LDL_L03.fsm LDL L03 97.648727 97.473358 97.625854 20170424 20170424EDL_L03.fsm EDL L03 97.929924 98.087311 98.401871 NaN NaN NaN NaN 100.707558 100.897781 101.146111 NaN 100.523865 100.686882 101.014961 NaN 101.409744 101.463905 101.606628 NaN 101.298958 101.167229 101.249306 NaN 100.134201 100.167328 100.477051 NaN 101.181587 101.150063 101.210358 NaN 101.320335 101.342384 101.352371 NaN 101.108292 101.222267 101.403160 NaN 98.958916 98.936974 99.172264 NaN 96.649536 96.657967 96.780930 NaN 98.037086 97.986336 98.078285 NaN 98.633995 98.708939 98.927238 NaN 101.184662 101.305420 101.402985 NaN 102.573097 102.672607 102.744850 NaN 103.063278 103.227814 103.421783 NaN 103.116150 103.084473 103.313644 NaN 102.316795 102.298477 102.427322 NaN 100.783340 100.682831 100.949509 ... ... ... ... NaN 100.777260 100.811867 100.914032 NaN 102.247948 102.313698 102.731323 NaN 102.806313 102.853012 103.050240 NaN 102.898071 102.975212 103.350914 NaN 102.622520 102.641243 102.992310 NaN 102.800278 102.625954 102.720184 NaN 102.998772 103.173286 103.295296 NaN 102.566078 102.453514 102.457191 NaN 101.370506 101.531540 101.880219 NaN 100.370956 100.366791 100.437279 NaN 102.368912 102.361794 102.657280 NaN 103.767342 103.766953 104.029892 NaN 102.990990 102.826973 102.979378 NaN 102.646469 102.669449 102.939774 NaN 102.359398 102.274765 102.665016 NaN 103.968262 104.009453 104.161545 NaN 104.205429 104.196121 104.477318 NaN 103.134033 103.315300 103.592659 NaN 101.543167 101.688293 101.917000 NaN 102.990639 102.970367 102.994751 NaN 104.538132 104.676315 105.022194 NaN 104.057968 104.072235 104.313950 NaN 102.132980 102.182739 102.388458 NaN 103.876236 103.878395 104.080772 NaN 105.723640 105.937202 106.119095 NaN 105.846237 105.953285 106.168076 NaN 106.057655 106.088699 106.293068 NaN 106.330757 106.470024 106.731667 NaN 106.257256 106.276718 106.477089 NaN 106.322388 106.320198 106.476852 3994.0 3992.0 3990.0 \ date filename medium slide 20170418 20170418STD_L01.fsm STD L01 101.042213 101.127029 101.041862 20170418STD_L03.fsm STD L03 100.859825 101.011810 100.881363 20170419 20170419EPA_L03.fsm EPA L03 101.187706 101.240456 101.315536 20170419STD_L02.fsm STD L02 100.876060 101.003120 101.168861 20170420 20170420EDL_L01.fsm EDL L01 100.264618 100.468994 100.374901 20170420EPA_L01.fsm EPA L01 100.350197 100.373093 100.387566 20170420LDL_L01.fsm LDL L01 100.200516 100.322189 100.209435 20170421 20170421EDL_L02.fsm EDL L02 99.979538 99.937737 100.053764 20170421EPA_L02.fsm EPA L02 98.903679 99.061241 99.115631 20170421LDL_L02.fsm LDL L02 99.790405 100.009796 100.049370 20170422 20170422LDL_L03.fsm LDL L03 98.008072 98.248367 98.330795 20170424 20170424EDL_L03.fsm EDL L03 98.482269 98.331657 98.288475 NaN NaN NaN NaN 101.172783 101.122574 101.068527 NaN 101.265068 101.418175 101.441475 NaN 101.688332 101.782608 101.840454 NaN 101.422386 101.529556 101.549568 NaN 100.653099 100.691956 100.718109 NaN 101.335983 101.351372 101.378624 NaN 101.296646 101.226707 101.358971 NaN 101.438881 101.384811 101.409660 NaN 99.350510 99.318413 99.282066 NaN 96.880531 96.841644 96.969887 NaN 98.170647 98.209190 98.266426 NaN 98.998413 98.803551 98.630127 NaN 101.494896 101.456848 101.366470 NaN 102.736801 102.739510 102.837883 NaN 103.618141 103.686699 103.570320 NaN 103.545265 103.581879 103.541756 NaN 102.606453 102.640564 102.613075 NaN 101.157631 101.114922 101.183067 ... ... ... ... NaN 101.101219 101.146675 101.119804 NaN 103.064972 103.087975 103.013954 NaN 103.156837 103.198341 103.283920 NaN 103.603951 103.672760 103.898476 NaN 103.319633 103.326538 103.366898 NaN 102.986526 103.020821 102.987846 NaN 103.319916 103.391968 103.650070 NaN 102.541862 102.734886 102.966606 NaN 102.053375 102.041237 102.092140 NaN 100.614357 100.803871 100.943771 NaN 103.046471 103.248253 103.228333 NaN 104.169853 104.152046 104.284393 NaN 103.177605 103.156570 103.172562 NaN 103.141785 103.073181 103.019325 NaN 103.146568 103.430466 103.598946 NaN 104.353561 104.509460 104.497269 NaN 104.772896 104.916801 104.975418 NaN 103.792297 103.807617 103.782814 NaN 102.016884 102.050018 102.049782 NaN 103.211342 103.394142 103.503998 NaN 105.421814 105.566093 105.452629 NaN 104.513428 104.505569 104.538742 NaN 102.558014 102.557755 102.636276 NaN 104.273811 104.315712 104.451172 NaN 106.187050 106.149406 106.050110 NaN 106.332436 106.404892 106.552223 NaN 106.691406 107.103096 107.253860 NaN 106.943954 107.062592 107.098076 NaN 106.683800 106.905350 107.017441 NaN 106.709534 106.852783 106.942635 3988.0 3986.0 3984.0 \ date filename medium slide 20170418 20170418STD_L01.fsm STD L01 100.910751 100.865280 100.962021 20170418STD_L03.fsm STD L03 100.574974 100.511772 100.669250 20170419 20170419EPA_L03.fsm EPA L03 101.130440 100.767326 100.745483 20170419STD_L02.fsm STD L02 101.041557 100.821655 100.896675 20170420 20170420EDL_L01.fsm EDL L01 100.055763 99.954872 100.125397 20170420EPA_L01.fsm EPA L01 100.381561 100.376938 100.457397 20170420LDL_L01.fsm LDL L01 100.021652 99.910301 99.854286 20170421 20170421EDL_L02.fsm EDL L02 100.077660 99.982674 99.839264 20170421EPA_L02.fsm EPA L02 98.796257 98.382439 98.376617 20170421LDL_L02.fsm LDL L02 99.777679 99.586403 99.624626 20170422 20170422LDL_L03.fsm LDL L03 98.171387 97.917404 97.820259 20170424 20170424EDL_L03.fsm EDL L03 98.131828 97.962547 98.051163 NaN NaN NaN NaN 100.795059 100.564850 100.699104 NaN 101.115059 100.809456 100.893379 NaN 101.699097 101.661537 101.830284 NaN 101.390434 101.261772 101.378426 NaN 100.511581 100.294182 100.397125 NaN 101.270973 101.156845 101.256187 NaN 101.380615 101.311760 101.436279 NaN 101.434792 101.451614 101.458961 NaN 99.134193 99.009178 99.103783 NaN 97.063652 96.950272 96.899689 NaN 98.125877 98.046371 98.251450 NaN 98.455757 98.321823 98.540588 NaN 101.267822 101.237198 101.302116 NaN 102.739700 102.484962 102.409019 NaN 103.238609 103.078979 103.230988 NaN 103.394341 103.272560 103.316582 NaN 102.413216 102.069626 102.015404 NaN 101.117531 100.980415 101.089264 ... ... ... ... NaN 100.978111 100.748283 100.718498 NaN 102.831612 102.727715 102.932297 NaN 103.087715 102.788193 102.853317 NaN 103.860626 103.579712 103.549484 NaN 103.366478 103.199524 103.117081 NaN 102.901306 102.910378 103.046501 NaN 103.538994 103.199615 103.242645 NaN 102.842705 102.528267 102.578934 NaN 101.882034 101.550720 101.521309 NaN 100.809067 100.624687 100.647217 NaN 102.857292 102.508919 102.616096 NaN 104.121857 103.679535 103.665115 NaN 103.113266 103.075020 103.266174 NaN 102.884338 102.757912 102.936836 NaN 103.413574 103.101234 103.070282 NaN 104.206413 104.012299 104.179771 NaN 104.719467 104.321556 104.263184 NaN 103.485748 103.117172 103.178848 NaN 101.703392 101.331001 101.460556 NaN 103.404640 103.125984 103.024109 NaN 105.042053 104.697525 104.769348 NaN 104.496307 104.325920 104.499146 NaN 102.570503 102.329849 102.377327 NaN 104.364967 104.014046 104.025848 NaN 105.791168 105.635605 105.882988 NaN 106.526405 106.292870 106.261147 NaN 106.931076 106.535164 106.412582 NaN 106.924377 106.762016 106.858765 NaN 106.706062 106.422119 106.552071 NaN 106.916428 106.779396 106.810669 3982.0 ... 768.0 \ date filename medium slide ... 20170418 20170418STD_L01.fsm STD L01 101.082581 ... 97.847168 20170418STD_L03.fsm STD L03 100.680412 ... 88.620834 20170419 20170419EPA_L03.fsm EPA L03 100.949631 ... 97.180229 20170419STD_L02.fsm STD L02 100.990318 ... 97.403694 20170420 20170420EDL_L01.fsm EDL L01 100.189461 ... 91.867386 20170420EPA_L01.fsm EPA L01 100.403809 ... 91.011871 20170420LDL_L01.fsm LDL L01 99.849792 ... 92.671143 20170421 20170421EDL_L02.fsm EDL L02 99.756645 ... 89.964493 20170421EPA_L02.fsm EPA L02 98.610786 ... 104.043015 20170421LDL_L02.fsm LDL L02 99.642006 ... 95.606041 20170422 20170422LDL_L03.fsm LDL L03 97.902710 ... 92.277618 20170424 20170424EDL_L03.fsm EDL L03 98.105408 ... 96.227180 NaN NaN NaN NaN 100.895401 ... 108.243217 NaN 101.052010 ... 93.163918 NaN 101.912880 ... 104.118797 NaN 101.407669 ... 87.206390 NaN 100.556946 ... 94.742813 NaN 101.448196 ... 90.703300 NaN 101.612427 ... 96.453201 NaN 101.401611 ... 91.815041 NaN 99.291458 ... 92.194069 NaN 96.914688 ... 82.858711 NaN 98.296463 ... 94.943924 NaN 98.793625 ... 87.688866 NaN 101.376289 ... 101.239548 NaN 102.508377 ... 86.944206 NaN 103.374344 ... 99.760765 NaN 103.286270 ... 89.517639 NaN 102.244118 ... 96.053947 NaN 101.146576 ... 88.004372 ... ... ... ... NaN 100.902367 ... 87.174370 NaN 103.077293 ... 100.824036 NaN 103.035210 ... 90.893433 NaN 103.518913 ... 92.329071 NaN 103.154640 ... 89.028191 NaN 103.086075 ... 98.479439 NaN 103.512543 ... 82.885292 NaN 102.883621 ... 92.459297 NaN 101.629356 ... 81.153015 NaN 100.730591 ... 92.652802 NaN 102.881889 ... 87.379326 NaN 103.981987 ... 95.537773 NaN 103.372910 ... 87.237984 NaN 103.144638 ... 83.719475 NaN 103.168159 ... 93.469635 NaN 104.433418 ... 93.624466 NaN 104.366058 ... 87.932289 NaN 103.468414 ... 94.257248 NaN 101.780052 ... 85.009865 NaN 103.172470 ... 77.939697 NaN 104.979858 ... 81.837387 NaN 104.841652 ... 94.542046 NaN 102.612976 ... 99.870026 NaN 104.471138 ... 93.032730 NaN 106.185516 ... 84.111847 NaN 106.413635 ... 94.299927 NaN 106.549843 ... 79.366203 NaN 106.876579 ... 94.780693 NaN 106.708603 ... 86.119209 NaN 106.930885 ... 89.656952 766.0 764.0 762.0 \ date filename medium slide 20170418 20170418STD_L01.fsm STD L01 99.934502 98.916954 97.683571 20170418STD_L03.fsm STD L03 88.410263 88.908142 88.539413 20170419 20170419EPA_L03.fsm EPA L03 97.375801 102.663254 114.905739 20170419STD_L02.fsm STD L02 91.576401 89.823914 91.289764 20170420 20170420EDL_L01.fsm EDL L01 87.589760 87.946770 98.988518 20170420EPA_L01.fsm EPA L01 92.567955 86.593269 83.686249 20170420LDL_L01.fsm LDL L01 97.254547 100.758766 100.427223 20170421 20170421EDL_L02.fsm EDL L02 90.353584 97.386131 101.835999 20170421EPA_L02.fsm EPA L02 105.049744 101.846985 96.491173 20170421LDL_L02.fsm LDL L02 87.655197 84.711105 86.612267 20170422 20170422LDL_L03.fsm LDL L03 89.417854 90.469185 94.064812 20170424 20170424EDL_L03.fsm EDL L03 94.055908 97.769157 100.939896 NaN NaN NaN NaN 103.757820 98.143608 102.016747 NaN 93.197144 89.236572 85.776962 NaN 104.342163 112.134460 119.816986 NaN 87.721451 88.294357 91.709450 NaN 100.480103 101.994179 107.915695 NaN 92.288826 91.804420 84.179291 NaN 92.578758 91.326553 91.119362 NaN 95.910400 99.541359 94.143921 NaN 96.189926 101.902031 104.882935 NaN 86.562057 89.648300 91.527390 NaN 94.446526 96.256729 95.144218 NaN 83.453026 83.650505 87.118317 NaN 102.612473 100.282814 96.952637 NaN 89.071732 93.803757 99.715996 NaN 99.210052 104.921722 105.551941 NaN 89.289810 88.998016 91.661736 NaN 103.091301 117.383934 121.030472 NaN 86.677589 90.121895 98.115494 ... ... ... ... NaN 91.290176 95.661148 94.256355 NaN 96.268364 92.197662 97.155876 NaN 92.403267 94.108315 88.985130 NaN 92.321854 95.362221 96.325684 NaN 95.365204 95.285606 95.824982 NaN 96.746750 100.504051 98.223961 NaN 79.937317 80.184105 82.810097 NaN 84.320915 83.680252 92.836510 NaN 81.095695 82.739487 85.033768 NaN 97.657761 96.038300 91.474518 NaN 85.903908 79.372910 78.047264 NaN 98.187317 92.996117 85.032242 NaN 97.028976 102.408615 98.007240 NaN 89.724762 95.743675 97.689003 NaN 92.720810 87.362144 88.195557 NaN 96.190445 100.837036 98.078812 NaN 86.791588 90.860252 87.718681 NaN 87.585037 79.162239 79.784500 NaN 84.847832 86.460388 91.257790 NaN 79.889091 84.901070 95.860443 NaN 81.122726 83.827202 84.746201 NaN 88.900993 91.196976 96.487717 NaN 100.818489 94.008492 92.473572 NaN 93.969879 86.749847 71.764000 NaN 82.870277 83.061073 85.660034 NaN 84.158897 82.856895 87.493027 NaN 84.372948 89.287621 91.708710 NaN 106.678513 111.632858 100.797249 NaN 78.669502 78.765411 83.522842 NaN 96.044250 103.532494 101.821732 760.0 758.0 756.0 \ date filename medium slide 20170418 20170418STD_L01.fsm STD L01 92.842155 96.027298 96.088387 20170418STD_L03.fsm STD L03 86.191490 84.405281 87.334167 20170419 20170419EPA_L03.fsm EPA L03 118.503342 116.997360 119.955429 20170419STD_L02.fsm STD L02 88.808784 95.976746 111.838791 20170420 20170420EDL_L01.fsm EDL L01 107.015526 107.126251 110.778641 20170420EPA_L01.fsm EPA L01 85.868645 97.246002 110.951599 20170420LDL_L01.fsm LDL L01 96.501488 95.518997 95.328941 20170421 20170421EDL_L02.fsm EDL L02 95.810768 91.124649 94.859505 20170421EPA_L02.fsm EPA L02 94.230507 103.275208 109.447952 20170421LDL_L02.fsm LDL L02 80.581963 82.191971 89.463554 20170422 20170422LDL_L03.fsm LDL L03 95.721458 89.059097 89.150780 20170424 20170424EDL_L03.fsm EDL L03 94.171326 84.503754 87.896156 NaN NaN NaN NaN 110.541435 112.555496 112.099350 NaN 86.177628 86.120354 80.234840 NaN 122.106201 128.222351 130.554031 NaN 96.725685 95.878967 106.070190 NaN 109.565353 100.807991 98.328598 NaN 78.364723 81.415543 83.699532 NaN 85.425476 82.000771 88.244247 NaN 88.442635 89.258858 89.521736 NaN 102.331520 104.790810 105.319313 NaN 95.512703 96.776672 92.712936 NaN 96.771492 102.043358 105.163612 NaN 92.217873 93.079643 94.566597 NaN 101.188896 102.033966 94.247391 NaN 102.025940 100.503807 100.743172 NaN 96.492523 96.473763 110.565521 NaN 97.152466 101.936752 103.548485 NaN 110.498100 100.448959 96.733490 NaN 106.968147 99.374992 85.719589 ... ... ... ... NaN 88.306602 86.367424 84.838478 NaN 105.982300 109.171600 101.042694 NaN 83.297440 84.626945 87.911278 NaN 97.015106 94.567345 86.227242 NaN 103.954407 103.351685 96.190948 NaN 91.974136 100.171455 111.190247 NaN 82.426643 85.251747 84.637634 NaN 102.717896 107.364151 111.667023 NaN 83.235626 86.032768 97.710838 NaN 100.250175 114.536087 114.504799 NaN 87.425621 95.255112 93.280022 NaN 84.818810 91.358185 104.639030 NaN 103.403496 110.116470 97.084328 NaN 93.761063 95.600655 101.437691 NaN 94.011917 94.566177 87.154015 NaN 96.323486 102.080765 103.145172 NaN 86.979218 92.121437 93.472984 NaN 95.074821 106.094154 108.195168 NaN 88.581253 80.782463 91.900406 NaN 101.654381 102.141472 109.681610 NaN 88.912285 89.839912 99.810211 NaN 98.597908 97.906975 103.217842 NaN 103.089981 103.564880 99.391121 NaN 71.467560 85.254601 97.048798 NaN 85.424843 77.223602 79.307907 NaN 91.339355 101.145294 102.862724 NaN 91.835510 85.769707 83.440140 NaN 87.645233 91.677406 101.393486 NaN 94.490059 97.960152 99.420647 NaN 98.119583 106.574509 114.233269 754.0 752.0 750.0 date filename medium slide 20170418 20170418STD_L01.fsm STD L01 113.870193 130.672668 130.120468 20170418STD_L03.fsm STD L03 106.732666 122.840134 131.918289 20170419 20170419EPA_L03.fsm EPA L03 117.111893 118.597321 112.632675 20170419STD_L02.fsm STD L02 112.974892 95.770256 90.075630 20170420 20170420EDL_L01.fsm EDL L01 111.859688 124.256477 139.328354 20170420EPA_L01.fsm EPA L01 106.214729 95.746674 102.821426 20170420LDL_L01.fsm LDL L01 105.457169 120.860954 134.041229 20170421 20170421EDL_L02.fsm EDL L02 91.000809 98.432159 115.355316 20170421EPA_L02.fsm EPA L02 104.784416 98.080559 112.955231 20170421LDL_L02.fsm LDL L02 98.798721 114.556267 129.300308 20170422 20170422LDL_L03.fsm LDL L03 96.951324 104.087776 129.143875 20170424 20170424EDL_L03.fsm EDL L03 93.599846 90.318748 102.521927 NaN NaN NaN NaN 120.949921 134.045258 134.507690 NaN 77.837006 85.999451 96.042374 NaN 126.487251 122.507408 125.694550 NaN 114.774338 110.014786 108.176598 NaN 116.974342 136.313232 142.106888 NaN 86.489349 89.476707 110.592361 NaN 98.255325 115.094772 112.032875 NaN 89.980728 95.863831 133.690338 NaN 92.559456 93.372200 111.725449 NaN 89.670258 104.136192 129.500397 NaN 102.873634 88.581467 93.870941 NaN 99.420502 95.210159 109.786858 NaN 86.744728 92.390434 110.039902 NaN 96.428345 91.777588 102.133469 NaN 115.881119 111.549980 124.439857 NaN 108.461761 114.816826 103.633095 NaN 107.314552 127.992340 144.923645 NaN 87.516823 83.144257 79.988937 ... ... ... ... NaN 80.131302 91.170433 110.405586 NaN 102.089340 103.251915 100.954506 NaN 100.101814 106.612007 115.218018 NaN 91.454987 109.662926 130.148880 NaN 87.868607 86.699188 100.751404 NaN 111.961784 118.116463 139.369934 NaN 88.090385 96.863525 116.877663 NaN 113.579231 118.856903 126.939423 NaN 103.149506 97.596558 92.920403 NaN 110.349564 114.752075 126.208321 NaN 102.702995 114.197426 110.894417 NaN 108.639786 108.381905 120.923454 NaN 91.397369 94.453857 99.098465 NaN 104.611053 121.836426 144.519989 NaN 83.709595 84.987785 98.436295 NaN 109.984978 107.802322 108.511925 NaN 80.918518 69.891281 87.796272 NaN 106.367905 109.786850 101.845345 NaN 101.259224 106.773705 124.803596 NaN 117.546150 120.042358 127.797256 NaN 101.262184 97.645584 110.235054 NaN 116.192940 124.565170 137.355240 NaN 93.998566 83.536140 85.828781 NaN 98.797340 100.294891 110.495911 NaN 97.077164 110.824722 118.402489 NaN 110.327568 126.226738 148.152344 NaN 98.902176 105.689407 98.046310 NaN 109.113014 120.452744 127.805046 NaN 104.112411 110.686157 145.020142 NaN 103.121033 103.059875 116.714706 [70186 rows x 1626 columns]>
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(df.groupby('medium').mean().T)
plt.show()
pca = PCA(n_components=6)
pca.fit(spectrum)
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None, svd_solver='auto', tol=0.0, whiten=False)
wavelength=list_spec[next(iter(list_spec))].wavelength
#wavelength=list_spec[0].wavelength
plt.plot(wavelength,pca.components_[:6].T)
plt.show()
plt.plot(pca.explained_variance_ratio_[0:5])
plt.show()
print(pca.explained_variance_ratio_[0:5])
[0.71655348 0.2016908 0.02776846 0.02209929 0.01167137]
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
pca_norm = make_pipeline(StandardScaler(), PCA(n_components=6, whiten=True))
pca = PCA(n_components=6, whiten=True)
X_r = pca.fit_transform(spectrum)
X_r = pca_norm.fit_transform(spectrum)
print(X_r.shape)
(70186, 6)
plt.plot(wavelength,pca.components_[0].T)
plt.axvspan(1000, 1120, alpha=0.2, color='grey')
plt.show()
plt.plot(wavelength,pca.components_[1].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
plt.plot(wavelength,pca.components_[2].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
plt.plot(wavelength,pca.components_[3].T)
plt.axvspan(750, 898, alpha=0.2, color='grey')
plt.show()
plt.plot(wavelength,pca.components_[4].T)
plt.axvspan(2400, 2450, alpha=0.2, color='grey')
plt.show()
plt.plot(wavelength,pca.components_[5].T)
plt.axvspan(1000, 1200, alpha=0.2, color='grey')
plt.show()
for medium in metadata['medium'].unique() :
mask = metadata['medium'] == medium
plt.scatter(X_r[mask, 0], X_r[mask, 1],
label=medium, s=5, alpha=0.7)
plt.legend()
plt.show()
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-37-b85e9a59e20e> in <module>() 1 for medium in metadata['medium'].unique() : 2 mask = metadata['medium'] == medium ----> 3 plt.scatter(X_r[mask, 0], X_r[mask, 1], 4 label=medium, s=5, alpha=0.7) 5 plt.legend() IndexError: boolean index did not match indexed array along dimension 0; dimension is 70186 but corresponding boolean dimension is 12
plt.scatter(X_r[:, 0], X_r[:, 1])
plt.show()
df_spectrum