RE: [Berkeley-Access-Database-Users-Group] Major import problem...

From: Jon S.
Sent on: Sunday, October 14, 2012 9:26 AM

Thanks so much, Dan… I’m instrumenting the code to see exactly where it occurs and also getting my hands on the file(s) causing the problem so we’ll see what happens.

 

Best…jon

 

Jon Seidel, CMC®

Effective Decisions... Priceless!

 

From: [address removed] [mailto:[address removed]] On Behalf Of Dan Stern
Sent: Thursday, October 11,[masked]:39 PM
To: [address removed]
Subject: Re: [Berkeley-Access-Database-Users-Group] Major import problem...

 

Hi Jon,

Here is what I have. It's for comma-separated inputs that sometimes stretch across more than one line. Also, this copy doesn't have something I added later which closed and reopened the SQL recordset every 100K records or so. Otherwise, ADO seeks forever trying to find the end of file when the number of records get large, and it hangs, and you'll see Access memory usage expand forever until it runs out. Logevent is a specialized function that writes errors to an event log. You need to set a reference to Microsoft  Scripting Runtime.
- Dan

Public Function ImportFile(sTable As String, sFile As String) As Boolean  'Copyright 2012 Dan Stern www.dansternsystems.com
    Dim FSO As New FileSystemObject, lFileSize As Long, sDateModified As String
    With FSO.GetFile(sFile)
        lFileSize = .Size
        sDateModified = .DateLastModified
    End With
    If Int(CDate(sDateModified)) <> Int(Date) Then
        LogEvent "Data appears to have not run tonight, based on file date for " & _
         sFile & " of " & sDateModified, -400, 1
    End If
    LogEvent "Importing file " & sFile & " created " & sDateModified & " into table " & sTable, 0, 2
    Dim sLine As String, sLine2 As String, vSplit As Variant, iUB As Integer, iField As Integer, iPos As Integer
    Dim lRec As Long, lLine As Long, iQuote As Integer, vFields As Variant, iFields As Integer, iFieldsToCopy As Integer, sData As String
    Dim rs As New ADODB.Recordset, sSQL As String, sTableB As String, iField2 As Integer, lBytesRead As Long
    'On Error GoTo ImportFileError
    Dim iFile As Integer
    iFile = FreeFile
    Open sFile For Input As #iFile
    If Not EOF(iFile) Then
        GetLine sLine, lLine, iFile, lBytesRead
        vFields = Split(sLine, vbTab)
        iFields = UBound(vFields)
    End If
   
    For iField = 1 To iFields  'Look for duplicate column names. Could use collection too.
        For iField2 = 1 To iFields
            If iField <> iField2 Then
                If vFields(iField) = vFields(iField2) Then
                    vFields(iField) = vFields(iField) & "1"
                End If
            End If
        Next iField2
    Next iField
   
    sTableB = Bracketize(sTable)
    sSQL = "IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID('" & sTableB & "') AND type in ('U')) " & vbCrLf & _
        " DROP TABLE " & sTableB
    CurrentProject.Connection.Execute sSQL
    sSQL = "CREATE TABLE " & sTableB & " (" & vbCrLf & _
        "     [ID] [int] IDENTITY(1,1) NOT NULL, " & vbCrLf
    For iField = 0 To iFields
        sSQL = sSQL & Bracketize(CStr(vFields(iField))) & " varchar(255) NULL, " & vbCrLf
    Next iField
'Index created after import to speed up; 9/13/2011 DMS
'        sSQL = sSQL & " CONSTRAINT [IX_" & sTable & "] UNIQUE NONCLUSTERED " & vbCrLf & _
'         "( " & vbCrLf & _
'         "[ID] Asc " & _
'         ")WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY] "
         sSQL = sSQL & " ) ON [PRIMARY] "
    CurrentProject.Connection.Execute sSQL
    sSQL = sTableB
    rs.Open sSQL, CurrentProject.Connection, adOpenDynamic, adLockOptimistic, adCmdTable
    If rs.Fields.Count <> iFields + 2 Then
        LogEvent iFields + 2 & " fields in input header but " & rs.Fields.Count & " fields in table " & sTable, -2, 2
        GoTo CloseAll
    End If
    Do While Not EOF(iFile)
        bConcatenating = False
        ' **** Read each line ****
        GetLine sLine, lLine, iFile, lBytesRead
        iQuote = InStr(sLine, Chr(34))  'Look for fields that have cr's embedded in them with quotation marks delineating them.
        If iQuote > 0 Then 'A quote found
            bConcatenating = True
            iQuote = InStr(iQuote + 1, sLine, Chr(34)) 'Look for a 2nd quote in the same line
            If iQuote = 0 Then  'No second quote, so assume the next line has it
                GetLine sLine2, lLine, iFile, lBytesRead 'Grab that next line
                sLine = sLine & " " & sLine2 'Concatenate the next line
                iQuote = InStr(sLine2, Chr(34)) 'Look for quote in 2nd line
                If iQuote = 0 Then 'No quote in 2nd line,
                    GetLine sLine2, lLine, iFile, lBytesRead  'so grab 3rd line for continuation
                    sLine = sLine & sLine2 'concatenate
                    iQuote = InStr(sLine2, Chr(34)) 'Look for quote in 3rd line
                    If iQuote = 0 Then 'No quote in 3rd line,
                        GetLine sLine2, lLine, iFile, lBytesRead   'so grab a 4th line
                        sLine = sLine & sLine2 'concatenate
                        iQuote = InStr(sLine2, Chr(34)) 'Look for quote in 4th line
                        If iQuote = 0 Then LogEvent "Possibly more than 4 lines in a field at line " & lLine & ": " & left(sLine2, 80), -1, 3
                    End If
                End If
            End If
            lLine = Replace(lLine, Chr(34), "")
        End If
        lRec = lRec + 1
        If lLine Mod 1000 = 0 Then
            ShowMsg ("Importing to table " & sTable & ": " & lLine & " lines read; " & lRec & " Recs read. " & Format(lBytesRead / lFileSize, "#.00%"))
        End If
        vSplit = Split(sLine, vbTab) 'Split the line into a tab delimited array
        iUB = UBound(vSplit)
        If iFields <> iUB Then
            LogEvent iFields & " fields in input header but " & iUB & " fields in record " & lRec & ": " & left(sLine, 80), -3, 3
        End If
        If iFields > iUB Then  'Hopefully are equal, but if they don't match, at least try
            iFieldsToCopy = iUB
        Else
            iFieldsToCopy = iFields
        End If
        If lLine Mod 10000 = 0 Then  'after 250K records, can't handle any more
            rs.Close
            sSQL = sTableB
            rs.Open sSQL, CurrentProject.Connection, adOpenDynamic, adLockOptimistic, adCmdTable
        End If
        With rs
             .AddNew
            For iField = 0 To iFieldsToCopy
                sData = vSplit(iField)
                If Len(sData) > 255 Then
                    LogEvent "Record: " & lRec & ", Field: " & vFields(iField) & " is longer than 255 chars.", -4, 3
                End If
                .Fields(vFields(iField)) = left(sData, 255)
            Next iField
            .Update
         End With
    Loop
'Index created after import to speed up; 9/13/2011 DMS
        sSQL = "ALTER TABLE " & sTableB & " ADD CONSTRAINT [IX_" & sTable & "] UNIQUE NONCLUSTERED " & vbCrLf & _
         "( " & vbCrLf & _
         "[ID] Asc " & _
         ")WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY] "
    LogEvent "Building index for " & sTable, 0, 3
    CurrentProject.Connection.Execute sSQL
    LogEvent sTable & " Completed: " & lLine & " lines read; " & lRec & " Recs read", 0, 3
CloseAll:
    On Error Resume Next
    rs.Close
    Set rs = Nothing
    Close #iFile

End Function

Public Function GetLine(ByRef sLine As String, ByRef lLine As Long, ByVal iFile As Integer, ByRef lBytes As Long) 'Copyright 2012 Dan Stern www.dansternsystems.com
   If Not EOF(iFile) Then Line Input #iFile, sLine:   lLine = lLine + 1: lBytes = lBytes + Len(sLine)
   Dim iFirstQuote As Integer, sLineNoQuotes As String, lDiff As Long
   Do While Len(sLine) = 0 And Not EOF(iFile)
       Line Input #iFile, sLine:   lLine = lLine + 1: lBytes = lBytes + Len(sLine)
   Loop
   sLineNoQuotes = Replace(sLine, Chr(34), "")
   lDiff = Len(sLine) - Len(sLineNoQuotes)  'Handle quoted items in the middle of a split line. lDiff tells how many quotes in line
   If lDiff > 1 Then
        If lDiff Mod 2 = 0 Then  'If an even number of quotes, then they are balanced and ignore them all.
            sLine = sLineNoQuotes
        Else 'If odd and > 1 then take the last one only if in a concatenation process; otherwise take the first one only.
            If bConcatenating Then
                sLine = Replace(sLine, Chr(34), "", 1, lDiff - 1) 'Replace all but the last occurence
            Else 'Replace all but the last occurrence
                iFirstQuote = InStr(1, sLine, Chr(34)) 'Position of firstoccurrence of quote
                sLine = Replace(sLine, Chr(34), "", iFirstQuote + 1)
            End If
        End If
   End If
End Function

Public Function Bracketize(ByVal sItem As String) As String  'Copyright 2012 Dan Stern www.dansternsystems.com
    sItem = Replace(sItem, "[", "")
    sItem = Replace(sItem, "]", "")
    Bracketize = "[" & sItem & "]"
End Function

 


From: Jon Seidel <[address removed]>
To: [address removed]
Sent: Thursday, October 11,[masked]:54 PM
Subject: RE: [Berkeley-Access-Database-Users-Group] Major import problem...

 

Thanks Dan/Cory for your great feedback… excellent!

 

Dan… Is your source code available for that routine?

 

Thanks…jon

 

Jon Seidel, CMC®

Effective Decisions... Priceless!

 

From: [address removed] [mailto:[address removed]] On Behalf Of Dan Stern
Sent: Thursday, October 11,[masked]:57 PM
To: [address removed]
Subject: Re: [Berkeley-Access-Database-Users-Group] Major import problem...

 

I've run into limits importing from text files into SQL Server via MS-Access using docmd.transfer...., so I've built custom VBA routines using ADO that parse each line and write record by record to SQL using ADO. Then I ran into ADO limitations; if Access tries to write too much data before closing and reopening the output recordset, it crashes. So I had to make my routine close and re-open every so often. I think it was around the order of magnitude of data that you're talking about.

- Dan

 

 


From: Cory Nott <[address removed]>
To: [address removed]
Sent: Thursday, October 11,[masked]:18 AM
Subject: RE: [Berkeley-Access-Database-Users-Group] Major import problem...

 

My experience with imports has been that the post processing is usually not the problem. Once Access has the table and can read end to end, it won’t choke on anything except poorly constructed queries. If the file won’t import, or is not fully accessible, then it’s likely to be a problem in the ISAM (the libraries that run the methods for importing different, non-database files), perhaps running out of memory or encountering unexpected data. Another fun fact about Access is that it will only read the first few rows of a data file to determine the data types of the columns. If you aren’t using an import specification, a data column that Access thinks is numbers  but really is text later on could cause a failure.

 

Check your debug error. If it’s “Error in Installable ISAM”, you have an issue with either the size, the accessibility (as you point out below), or unexpected/corrupted data in the file.

 

Cory

 

From: [address removed] [mailto:[address removed]] On Behalf Of Jon Seidel
Sent: Thursday, October 11,[masked]:52 AM
To: [address removed]
Subject: RE: [Berkeley-Access-Database-Users-Group] Major import problem...

 

Thanks, Cory… The complete process requires  several steps (e.g., delete the old SQL import table; import the new data into a temp SQL table using DoCmd.transferSpreadsheet with a custom import specification; add processing date to temp table; find/copy error records to an exception table; delete the error records from the import table; delete any records with the new transaction date from the production table; append the imported data from the temp table to the production table) <whew!>

 

I use this same approach for the other three (smaller) import files which works just fine.

 

I did notice that the SQL database was defined with the option to increase size when needed by 1MB (total size = 30Gig right now), so increased that to a 5% growth factor on the guess that maybe doing several expansions during the import might cause sufficient delays to result in the problem.

 

Cheers…jon

 

Jon Seidel, CMC®

Effective Decisions... Priceless!

 

From: [address removed] [mailto:[address removed]] On Behalf Of Cory Nott
Sent: Thursday, October 11,[masked]:48 AM
To: [address removed]
Subject: RE: [Berkeley-Access-Database-Users-Group] Major import problem...

 

Another thing you can try is linking the file and going through it record by record (automated, of course) and seeing where it breaks.

 

Cory

 

 

From: [address removed] [mailto:[address removed]] On Behalf Of Jon Seidel
Sent: Thursday, October 11,[masked]:50 AM
To: [address removed]
Subject: [Berkeley-Access-Database-Users-Group] Major import problem...

 

I’ve got a problem that maybe someone can help me with.

 

Background

The application uses Access 2010 as a front end to a SQL Server 2008 back end. My app gets its information by importing data from another, purchased application that does the first chunk of work (payroll). We have to do imports because, although the other app also uses SQL Server, we are not allowed to get data directly from their database.

 

The payroll app exports data as tab-delimited text files [FYI, I really like this format because – unlike the CSV format -- there is no confusion as to which fields are which] which are then imported into my app with custom code and validations/error reports.

 

Problem

There are four files that are imported each week; three of them work just fine. The fourth one – the largest, something like 20MB – hangs / winds up throwing a debug error. The other three files are only 2-4MB. I can’t see anything wrong with the code… the size is the only difference I can see.

 

Has anyone run into a situation like this and have any pointers to a solution? It’s a REAL problem for my client.

 

Thanks much…jon

 

Jon Seidel, CMC®

Effective Decisions... Priceless!

 





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Jon Seidel ([address removed]) from Berkeley Access Database Users Group.
To learn more about Jon Seidel, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed] Description: Description: Image removed by sender.





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Cory Nott ([address removed]) from Berkeley Access Database Users Group.
To learn more about Cory Nott, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed] Description: Image removed by sender.





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Jon Seidel ([address removed]) from Berkeley Access Database Users Group.
To learn more about Jon Seidel, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed] Description: Image removed by sender.





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Cory Nott ([address removed]) from Berkeley Access Database Users Group.
To learn more about Cory Nott, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed]

 





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Dan Stern ([address removed]) from Berkeley Access Database Users Group.
To learn more about Dan Stern, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed]





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Jon Seidel ([address removed]) from Berkeley Access Database Users Group.
To learn more about Jon Seidel, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed]

 





--
Please Note: If you hit "REPLY", your message will be sent to everyone on this mailing list ([address removed])
This message was sent by Dan Stern ([address removed]) from Berkeley Access Database Users Group.
To learn more about Dan Stern, visit his/her member profile
Set my mailing list to email me As they are sent | In one daily email | Don't send me mailing list messages

Meetup, PO Box 4668 #37895 New York, New York[masked] | [address removed]

This email message originally included an attachment.

Our Sponsors

  • FMS, Inc.

    World Class Access Add-In Software

People in this
Meetup are also in:

Sign up

Meetup members, Log in

By clicking "Sign up" or "Sign up using Facebook", you confirm that you accept our Terms of Service & Privacy Policy