english version "1.0" identify "wxyz" #: Copyright (c) 1995, 2002 by Wayne C. Gramlich. #, All rights reserved. #, #, Permission to use, copy, modify, distribute, and sell this software #, for any purpose is hereby granted without fee provided that the above #, copyright notice and this permission are retained. The author makes #, no representations about the suitability of this software for any purpose. #, It is provided "as is" without express or implied warranty. module chunk #: This module implements the {chunk} and {chunks} types for SVMS history #, files. import address character data_io differ file_name file_system format history in_stream integer logical out_stream misc project resources set string svms unsigned vector version define chunk_type #: {chunk} representation enumeration binary #: Raw binary bytes error #: Unknown type (error) line #: Characters followed by new-line range #: A bunch of {chunk} ranges generate equal, hash, print, unsigned_convert #: An allocated {chunk} is always a member of exactly one {version}.{chunks} #, list. A {chunk} chunk can be moved from one {version}.{chunks} list #, to another one, but care must be taken to ensure that the previous #, one is deleted. define chunk #: Bytes of data in file record data string #: String of data data_hash unsigned #: Hash for chunk offset unsigned #: Offset into {version}.{chunks} resources resources #: Parent {resources} object sort_key unsigned #: Key used for sorting {chunk}'s type chunk_type #: {line} or {binary} or {range} version version #: Version that {chunk} is part of generate address_get, allocate, erase, identical, print define chunk_range #: List of chunk names record count unsigned #: Number of chunks in sequence indirect logical #: {true}=>keep expanding offset unsigned #: First chunk in range resources resources #: Parent {resources} object version version #: {version} that range is part of generate address_get, allocate, erase, identical, print define chunk_ranges #: List of chunk ranges record list vector[chunk_range] #: List of {chunk_range} resources resources #: Parent {resources} object generate address_get, allocate, erase, identical, print define chunks #: List of chunks record list vector[chunk] #: List of {chunk} resources resources #: Parent {resources} object generate address_get, allocate, erase, identical, print #: {chunk} procedures: procedure xallocate@chunk takes version version returns chunk #: This procedure will allocate and return a new {chunk} object #, that refers to {version}. assert version !== ?? chunks :@= version.chunks assert chunks !== ?? size :@= chunks.size resources :@= version.resources assert resources !== ?? # Allocate {chunk} and its {data} string: chunk :@= chunk_allocate@(resources) erase@(chunk) data :@= string_allocate@(resources) trim@(data, 0) # Fill in the fields: chunk.data := data chunk.data_hash := hash@(data) chunk.offset := size chunk.resources := resources chunk.sort_key := 0xffffffff # Huge offset! chunk.type := error chunk.version := version # Stick it into the {version}.{chunks} list: append@(chunks, chunk) #debug_stream :@= resources.global.debug_stream #format@format1[address](debug_stream, # 'allocate@chunk()=>%X%\n\', chunk.address) return chunk procedure compare@chunk takes chunk1 chunk chunk2 chunk returns integer #: This procedure returns -1, 0, or 1 depending upon whether the #, {sort_key} field in {chunk1} is less than, equal to, or greater #, than the {sort_key} field in {chunk2}. Please note that this #, procedure only examines the {sort_key} field and nothing else; #, in particular, when this procedure returns a 0, it may not mean #, that the two chunks are equal in terms of their contents (i.e. #, {equal}@{chunk}() might not return {true}.) return compare@(chunk1.sort_key, chunk2.sort_key) procedure deallocate@chunk takes chunk chunk returns_nothing #: This procedure will deallocate {chunk} and make it available for #, subsequent reallocation. assert chunk !== ?? data :@= chunk.data assert data !== ?? resources :@= chunk.resources debug_stream :@= resources.global.debug_stream #format@format2[address, string](debug_stream, # 'deallocate@chunk(%X%=%ds%)\n\', chunk.address, chunk.data) remove@(chunk) string_deallocate@(resources, data) erase@(chunk) chunk.resources := resources chunk_deallocate@(resources, chunk) procedure equal@chunk takes chunk1 chunk chunk2 chunk returns logical #: This procedure will return {true} if {chunk1} is equal to {chunk2} #, and {false} otherwise. assert chunk1 !== ?? && chunk2 !== ?? if chunk1.type != chunk2.type return false if chunk1.data != chunk2.data return false return true procedure hash@chunk takes chunk chunk returns unsigned #: This procedure will return a hash value for {chunk}. assert chunk !== ?? assert chunk.data_hash = hash@(chunk.data) return chunk.data_hash procedure move@chunk takes chunk chunk new_version version returns_nothing #: This procedure will move {chunk} from {chunk}.{version}.{chunks} #, to {new_version}.{chunks}. assert new_version !== ?? assert chunk !== ?? old_version :@= chunk.version assert old_version !== ?? assert new_version !== old_version remove@(chunk) # Append {chunk} to the end of chunks :@= new_version.chunks new_offset :@= chunks.size chunk.version := new_version chunk.offset := new_offset append@(chunks, chunk) assert chunks[new_offset] == chunk procedure read@chunk takes data_in_stream data_in_stream version version returns chunk #: This procedure will read in a {chunk} object from {data_in_stream} #, and return it. assert version !== ?? chunk :@= xallocate@chunk(version) data :@= chunk.data tag_character :@= tag_read@(data_in_stream) if tag_character = "B"[0] chunk.type := binary bytes_read@(data_in_stream, data) else_if tag_character = "T"[0] chunk.type := line string_read@(data_in_stream, data) else_if tag_character = "R"[0] chunk.type := range string_read@(data_in_stream, data) else # This should not happen! assert false new_line_read@(data_in_stream) chunk.data_hash := hash@(data) return chunk procedure remove@chunk takes chunk chunk returns_nothing #: This procedure will remove {chunk} from {chunk}.{version}.{chunks}. # Remove {chunk} from the old version: assert chunk !== ?? version :@= chunk.version assert version !== ?? chunks :@= version.chunks size :@= chunks.size offset :@= chunk.offset assert chunks[offset] == chunk moved_chunk :@= chunks[size - 1] moved_chunk.offset := offset chunks[offset] := moved_chunk chunk.offset := 0xffffffff # Huge offset! trim@(chunks, size - 1) history :@= version.history assert history !== ?? history.ranges_invalid := true procedure share@chunk takes chunk chunk share_table set[chunk] returns chunk #: This procedure will return a {chunk} object whose contents are #, equal to {chunk}. {chunk} must be part of either the comments #, or data of {version}. assert chunk !== ?? assert chunk.data_hash = hash@(chunk.data) version :@= chunk.version debug_stream :@= chunk.resources.global.debug_stream #format@format2[address, string](debug_stream, # 'share@chunk(%X%=%ds%)\n\', chunk.address, chunk.data) if version == ?? #format@format2[address, string](debug_stream, # 'share@chunk(%X%=%ds%)\n\', chunk.address, chunk.data) assert false history :@= version.history assert history !== ?? history :@= version.history shared_chunk :@= share_table[chunk] if shared_chunk == ?? # Not in {share_table}: assert !(insert@(share_table, chunk)) shared_chunk := chunk #format@format2[address, string](debug_stream, # 'insert(%X%=%ds%)\n\', shared_chunk.address, shared_chunk.data) else_if shared_chunk !== chunk if shared_chunk.version > version # {shared_chunk} is currently older than {chunk}, #, move it to the earlier version: move@(shared_chunk, version) deallocate@(chunk) #format@format3[address, string, address](debug_stream, # 'share@chunk(%X%(=%ds%))=>%X%\n\', # chunk.address, chunk.data, shared_chunk.address) return shared_chunk procedure show_prefixed@chunk takes chunk chunk prefix string out_stream out_stream returns_nothing #: This procedure will output {chunk} to {out_stream} where each #, line is prefixed by {prefix}. switch chunk.type case line put@(prefix, out_stream) put@(chunk.data, out_stream) put@("\n\", out_stream) default assert false procedure write@chunk takes chunk chunk data_out_stream data_out_stream version version returns_nothing #: This procedure will write {chunk} to {data_out_stream}. assert chunk !== ?? assert version !== ?? assert chunk.version == version data :@= chunk.data assert hash@(chunk.data) = chunk.data_hash switch chunk.type case binary tag_write@(data_out_stream, "B"[0]) bytes_write@(data_out_stream, data) case line tag_write@(data_out_stream, "T"[0]) string_write@(data_out_stream, data) case range tag_write@(data_out_stream, "R"[0]) string_write@(data_out_stream, data) default assert false new_line_write@(data_out_stream) #: {chunk_range} procedures: procedure xallocate@chunk_range takes version version returns chunk_range #: This procedure will allocate a new {chunk_range} for {version}. assert version !== ?? assert version.history !== ?? resources :@= version.history.resources assert resources !== ?? chunk_range :@= chunk_range_allocate@(resources) erase@(chunk_range) chunk_range.resources := resources chunk_range.version := version #debug_stream :@= resources.global.debug_stream #format@format2[address, address](debug_stream, # '#allocate@chunk_range(%X%)=>%X%\n\', # version.address, chunk_range.address) return chunk_range procedure chunks_append@chunk_range takes chunk_range chunk_range chunks chunks returns_nothing #: This procedure will append a {chunk} object to {chunks} for #, each appropriate {chunk} in {chunk_range}. assert chunk_range !== ?? assert chunks !== ?? version :@= chunk_range.version assert version !== ?? version_chunks :@= version.chunks assert version_chunks !== ?? offset :@= chunk_range.offset size :@= chunk_range.count index :@= 0 loop while index < size chunk :@= version_chunks[offset + index] if chunk_range.indirect # Indirect range {chunk}: assert chunk.type = range data :@= chunk.data # Extract the total number of {chunk_ranges}: at :@= "@"[0] colon :@= ":"[0] exclaimation :@= "!"[0] period :@= "."[0] space :@= " "[0] pointer :@= 0 total :@= 0 loop character :@= data[pointer] pointer :+= 1 while is_digit@(character) # 48 = unsigned_convert@("0"[0]) total := total * 10 + unsigned_convert@(character) - 48 assert character = period # Extract the indirect {chunk_range}'s: resources :@= chunks.resources versions :@= version.history.versions new_chunk_ranges :@= xallocate@chunk_ranges(resources) count :@= 0 loop while count < total character := data[pointer] pointer :+= 1 assert character = space new_chunk_range :@= xallocate@chunk_range(version) character := data[pointer] pointer :+= 1 new_chunk_range.indirect := character = "@"[0] number :@= 0 loop character := data[pointer] pointer :+= 1 while is_digit@(character) number := number * 10 + unsigned_convert@(character) - 48 assert character = colon new_chunk_range.version := versions[number] number := 0 loop character := data[pointer] pointer :+= 1 while is_digit@(character) number := number * 10 + unsigned_convert@(character) - 48 assert character = exclaimation new_chunk_range.offset := number number := 0 loop character := data[pointer] pointer :+= 1 while is_digit@(character) number := number * 10 + unsigned_convert@(character) - 48 assert character = period new_chunk_range.count := number append@(new_chunk_ranges.list, new_chunk_range) count :+= 1 chunks_convert@(new_chunk_ranges, chunks) deallocate@(new_chunk_ranges) else # Direct {chunk}: append@(chunks, chunk) index :+= 1 procedure compare@chunk_range takes chunk_range1 chunk_range chunk_range2 chunk_range returns integer #: This procedure will return -1, 0, or 1 depending upon whether #, {chunk_range1} is less than, equal to, or greater than #, {chunk_range2}. zero :@= integer_convert@(0) result :@= compare@(chunk_range1.version, chunk_range2.version) if result = zero result := compare@(chunk_range1.offset, chunk_range2.offset) if result = zero result := compare@(chunk_range1.count, chunk_range2.count) if result = zero if chunk_range1.indirect if chunk_range2.indirect result := zero else result := integer_convert@(1) else if chunk_range2.indirect result := -integer_convert@(1) else result := zero return result procedure deallocate@chunk_range takes chunk_range chunk_range returns_nothing #: This procedure will deallocate {chunk_range} for subsequent #, reallocation. #debug_stream :@= chunk_range.resources.global.debug_stream #format@format1[address](debug_stream, # '#deallocate@chunk_range(%X%)\n\', chunk_range.address) chunk_range_deallocate@(chunk_range.resources, chunk_range) procedure equal@chunk_range takes chunk_range1 chunk_range chunk_range2 chunk_range returns logical #: This procedure will return {true} if {chunk_range1} is equal to #, {chunk_range2}. return compare@(chunk_range1, chunk_range2) = integer_convert@(0) procedure greater_than@chunk_range takes chunk_range1 chunk_range chunk_range2 chunk_range returns logical #: This procedure will return {true} if {chunk_range1} is greater than #, {chunk_range2}. return compare@(chunk_range1, chunk_range2) > integer_convert@(0) procedure less_than@chunk_range takes chunk_range1 chunk_range chunk_range2 chunk_range returns logical #: This procedure will return {true} if {chunk_range1} is less than #, {chunk_range2}. return compare@(chunk_range1, chunk_range2) > integer_convert@(0) procedure read@chunk_range takes data_in_stream data_in_stream history history returns chunk_range #: This procedure will read in and return a {chunk_range} object from #, {data_in_stream} that was written by {write}@{chank_range}(). #, {history} is used to access the version list. space_read@(data_in_stream) character :@= character_read@(data_in_stream) version_number :@= unsigned_read_helper@(data_in_stream, ":"[0]) offset :@= unsigned_read_helper@(data_in_stream, "!"[0]) count :@= unsigned_read_helper@(data_in_stream, "."[0]) version :@= history.versions[version_number] assert version !== ?? chunk_range :@= xallocate@chunk_range(version) chunk_range.indirect := character = "@"[0] chunk_range.offset := offset chunk_range.count := count return chunk_range procedure write@chunk_range takes chunk_range chunk_range data_out_stream data_out_stream returns_nothing #: This procedure will write {chunk_range} out to {data_out_stream}. space_write@(data_out_stream) character_write@(data_out_stream, chunk_range.indirect ? "@"[0] : "|"[0]) unsigned_write_helper@(data_out_stream, chunk_range.version.offset) character_write@(data_out_stream, ":"[0]) unsigned_write_helper@(data_out_stream, chunk_range.offset) character_write@(data_out_stream, "!"[0]) unsigned_write_helper@(data_out_stream, chunk_range.count) character_write@(data_out_stream, "."[0]) #: {chunk_ranges} procedures: procedure xallocate@chunk_ranges takes resources resources returns chunk_ranges #: This procedure will allocate a new {chunk_ranges} object from #, {resources}. chunk_ranges :@= chunk_ranges_allocate@(resources) list :@= chunk_ranges.list erase@(chunk_ranges) if list == ?? list :@= allocate@vector[chunk_range]() truncate@(list, 0) chunk_ranges.list := list chunk_ranges.resources := resources #debug_stream :@= chunk_ranges.resources.global.debug_stream #format@format1[address](debug_stream, # '@allocate@chunk_ranges()=>%X%\n\', chunk_ranges.address) return chunk_ranges procedure append@chunk_ranges takes chunk_ranges chunk_ranges chunk chunk indirect logical returns_nothing #: This procedure will append into {chunk_ranges} creating #, a new {chunk_range} object to contain it if needed. assert chunk_ranges !== ?? assert chunk !== ?? list :@= chunk_ranges.list size :@= list.size resources :@= chunk_ranges.resources #debug_stream :@= resources.global.debug_stream #format@format4[address, address, logical, unsigned](debug_stream, # 'append@chunk_ranges(%X%, %X%, %l%) size=%d%\n\', # chunk_ranges.address, chunk.address, indirect, size) chunk_range:: chunk_range := ?? if size = 0 version :@= chunk.version assert version !== ?? chunk_range := xallocate@chunk_range(version) chunk_range.version := version chunk_range.offset := chunk.offset chunk_range.count := 1 chunk_range.indirect := indirect append@(list, chunk_range) #FIXME: return else chunk_range := list[size - 1] #format@format3[unsigned, unsigned, logical](debug_stream, # 'chunk_range:{offset: %d% count: %d% indirect:%l%}\n\', # chunk_range.offset, chunk_range.count, chunk_range.indirect) #format@format1[unsigned](debug_stream, # 'chunk.offset: %d%}\n\', chunk.offset) if chunk_range.version !== chunk.version || chunk_range.offset + chunk_range.count != chunk.offset || chunk_range.indirect != indirect version :@= chunk.version assert version !== ?? chunk_range := xallocate@chunk_range(version) chunk_range.version := version chunk_range.offset := chunk.offset chunk_range.count := 1 chunk_range.indirect := indirect append@(list, chunk_range) #FIXME: return else chunk_range.count :+= 1 #format@format4[address, address, logical, unsigned](debug_stream, # 'append@chunk_ranges(%X%, %X%, %l%) size=%d% returns\n\', # chunk_ranges.address, chunk.address, indirect, list.size) procedure chunks_convert@chunk_ranges takes chunk_ranges chunk_ranges chunks chunks returns_nothing #: This procedure will take a {chunks_ranges} object and expand #, it so that referenced chunk is appended to {chunks} in the #, proper order. assert chunk_ranges !== ?? assert chunks !== ?? list :@= chunk_ranges.list size :@= list.size index :@= 0 loop while index < size chunk_range :@= list[index] chunks_append@(chunk_range, chunks) index :+= 1 procedure compress@chunk_ranges takes chunk_ranges chunk_ranges version version share_table set[chunk] returns_nothing #: This procedure will compress {chunk_ranges} down to a single #, {chunk_range} object and return it. Any new indirect strings #, are added to {version}. # This is a fun algorithm that goes off and attempts to locate #, common sequences of lines. Unfortunately, the algorithm is #, a bit opaque. assert chunk_ranges !== ?? assert share_table !== ?? at_sign :@= "@"[0] colon :@= ":"[0] dot :@= "."[0] exclamation_point :@= "!"[0] space :@= " "[0] vertical_bar :@= "|"[0] resources :@= chunk_ranges.resources list :@= chunk_ranges.list #debug_stream :@= resources.global.debug_stream #format@format3[address, address, unsigned](debug_stream, # 'compress@chunk_ranges(%X%, %X%) size=%d%\n\', # chunk_ranges.address, version.address, list.size) temporary_chunk_ranges :@= xallocate@chunk_ranges(resources) temporary_list :@= temporary_chunk_ranges.list # Compress until there is one {chunk_range}: loop size :@= list.size while size > 1 # This is the blocking loop. A block of {chunk_range} objects is #, identified that is either 8 or 9 long (forshortened at the end.) #, The length of 8 or 9 is picked by comparing the 8th and 9th #, {chunk_range} objects. If they are "less than", a sequence of 8 #, is picked; otherwise 9 is picked. For large files that have #, that have lots of lines in common, this will eventually cause #, them to resynchronize: start :@= 0 end :@= 0 loop while end < size # Try to grab at least 8 {chunk_range} objects: end := start + 8 if end > size end := size else_if end < size if list[end - 1] < list[end] # Grab 9: end :+= 1 # OK. Now assemble a string the represents the sequence: chunk :@= xallocate@chunk(version) chunk.type := range data :@= chunk.data trim@(data, 0) number_append@(data, end - start) character_append@(data, dot) index :@= start loop while index < end chunk_range :@= list[index] character_append@(data, space) character_append@(data, chunk_range.indirect ? at_sign : vertical_bar) number_append@(data, chunk_range.version.offset) character_append@(data, colon) number_append@(data, chunk_range.offset) character_append@(data, exclamation_point) number_append@(data, chunk_range.count) character_append@(data, dot) index :+= 1 #format@format1[string](debug_stream, 'data=%ds%\n\', data) # Now get a chunk that represents the sequence: chunk.data_hash := hash@(data) shared_chunk :@= share@(chunk, share_table) append@(temporary_chunk_ranges, shared_chunk, true) start := end # Deallocate the old {chunk_range} objects: index :@= 0 loop while index < size chunk_range :@= list[index] deallocate@(chunk_range) index :+= 1 #format@format1[unsigned](debug_stream, # 'list.size(before)=%d%\n\', list.size) truncate@(list, 0) # Update the list with the new {chunk_range} objects: vector_append@(list, temporary_list) truncate@(temporary_list, 0) #format@format1[unsigned](debug_stream, # 'list.size(after)=%d%\n\', list.size) # We don't need {temporary_chunk_ranges} any more: deallocate@(temporary_chunk_ranges) #format@format3[address, address, unsigned](debug_stream, # 'compress@chunk_ranges(%X%, %X%) size=%d% returns\n\', # chunk_ranges.address, version.address, list.size) procedure deallocate@chunk_ranges takes chunk_ranges chunk_ranges returns_nothing #: This procedure will deallocate {chunk_ranges} for subsequent #, reallocation. list :@= chunk_ranges.list size :@= list.size #debug_stream :@= chunk_ranges.resources.global.debug_stream #format@format2[address, unsigned](debug_stream, # '@deallocate@chunk_ranges(%X%) size=%d%\n\', # chunk_ranges.address, size) index :@= 0 loop while index < size chunk_range :@= list[index] deallocate@(chunk_range) index :+= 1 chunk_ranges_deallocate@(chunk_ranges.resources, chunk_ranges) procedure read@chunk_ranges takes data_in_stream data_in_stream history history returns chunk_ranges #: This procedure will read in a {chunk_ranges} object that was #, originally writen by {write}@{chunk_ranges}(). {history} is #, used to access the {version} table. size :@= unsigned_read@(data_in_stream) chunk_ranges :@= xallocate@chunk_ranges(data_in_stream.resources) list :@= chunk_ranges.list assert list !== ?? index :@= 0 loop while index < size chunk_range :@= read@chunk_range(data_in_stream, history) append@(list, chunk_range) index :+= 1 return chunk_ranges procedure write@chunk_ranges takes chunk_ranges chunk_ranges data_out_stream data_out_stream returns_nothing #: This procedure will write {chunk_ranges} to {data_out_stream}. list :@= chunk_ranges.list size :@= list.size unsigned_write@(data_out_stream, size) index :@= 0 loop while index < size chunk_range :@= list[index] write@(chunk_range, data_out_stream) index :+= 1 #: {chunks} procedures: procedure xallocate@chunks takes resources resources returns chunks #: This procedure will allocate a new {chunks} object from {resources}. chunks :@= chunks_allocate@(resources) list :@= chunks.list erase@(chunks) chunks.resources := resources if list == ?? list := allocate@vector[chunk]() chunks.list := list #debug_stream :@= resources.global.debug_stream #format@format1[address](debug_stream, # 'allocate@chunks()=>%X%\n\', chunks.address) return chunks procedure append@chunks takes chunks chunks chunk chunk returns_nothing #: This procedure will append {chunk} to {chunks}. #debug_stream :@= chunk.resources.global.debug_stream #format@format3[address, address, string](debug_stream, # 'append@chunks(%X%, %X%=%ds%)\n\', # chunks.address, chunk.address, chunk.data) assert chunks !== ?? assert chunk !== ?? assert chunk.data_hash = hash@(chunk.data) append@(chunks.list, chunk) procedure binary_extract@chunks takes contents string version version returns chunks #: This procedure will treat {contents} as a sequence of raw bianry #, bytes and return a {chunks} object that contains the data. resources :@= version.resources chunks :@= xallocate@chunks(resources) history :@= version.history size :@= contents.size chunk :@= xallocate@chunk(version) data :@= chunk.data trim@(data, 0) index :@= 0 count :@= 0 byte :@= "0"[0] loop while index < size last_byte :@= byte byte :@= contents[index] character_append@(data, byte) count :+= 1 # If the bytes are essentially offset from a previous version #, of the file contents, we would eventually like to synchronize #, with the previous contents. The algorithm that attempts to #, accomplish this is fairly subtle and difficult to articulate. #, The algorithm causes chunks to be either 32 or 33 bytes long. #, If the chunk is 33 bytes long, the last byte is less than the #, second to last byte; otherwise, the chunk is will only be 32 #, bytes long. If the matching sequence in two versions of a #, file is long enough and the bytes are distributed more or #, less randomly, over time the algorithm will gravitate towards #, chunks that start with slightly larger values. Eventually, #, the chunks will synchronize and additional compression will #, be realized. I'm sorry if this description is clear as mud! if count >= 32 && last_byte >= byte || count >= 33 chunk.type := binary chunk.data_hash := hash@(data) append@(chunks, chunk) chunk := xallocate@chunk(version) count := 0 index :+= 1 # Deal with lines with no terminating new-line: if data.size = 0 deallocate@(chunk) else chunk.type := binary chunk.data_hash := hash@(data) append@(chunks, chunk) return chunks procedure chunk_ranges_convert@chunks takes chunks chunks version version share_table set[chunk] returns chunk_ranges #: This procedure will convert {chunks} into a corresponding #, {chunk_ranges} object. Any needed indirect {chunk}'s will #, be added to {version}. resources :@= chunks.resources list :@= chunks.list size :@= list.size #debug_stream :@= resources.global.debug_stream #format@format3[address, address, unsigned](debug_stream, # 'chunk_ranges_convert@chunks(%X%, %X%) size=%d%\n\', # chunks.address, version.address, size) chunk_ranges :@= xallocate@chunk_ranges(resources) index :@= 0 loop while index < size chunk :@= list[index] append@(chunk_ranges, chunk, false) index :+= 1 compress@(chunk_ranges, version, share_table) #format@format2[address, address](debug_stream, # 'chunk_ranges_convert@chunks(%X%, %X%) returns\n\', # chunks.address, version.address) return chunk_ranges procedure deallocate@chunks takes chunks chunks returns_nothing #: This procedure will deallocate {chunks} and make it available for #, subsequent reallocation. If {chunks}.{size} is non-zero, {chunks} #, must be a {version}.{chunks} object. list :@= chunks.list size :@= list.size #debug_stream :@= chunks.resources.global.debug_stream #format@format2[address, unsigned](debug_stream, # 'deallocate@chunks(%X%) size=%d%\n\', chunks.address, size) assert chunks !== ?? loop while list.size != 0 chunk :@= list[0] deallocate@(chunk) chunks_deallocate@(chunks.resources, chunks) #format@format1[address](debug_stream, # 'deallocate@chunks(%X%) returns\n\', chunks.address) procedure lines_extract@chunks takes chunks chunks version version contents string returns_nothing #: This procedure will treat {contents} as a sequence of lines and #, generate a sequence of shared {chunk} objects and append them to #, the end of {chunks}. Any new shared {chunk} objects will be part #, of {version}. {global} provides a few useful global values. assert chunks !== ?? assert version !== ?? assert contents !== ?? history :@= version.history assert history !==?? project :@= version.history.project_file.project assert project !== ?? global :@= project.global line_feed :@= "\n\"[0] unix_mode:: logical := true #FIXME: fetch from {global} chunk :@= xallocate@chunk(version) data :@= chunk.data trim@(data, 0) size :@= contents.size index :@= 0 if unix_mode loop while index < size character :@= contents[index] if character = line_feed # Line separator found: chunk.type := line chunk.data_hash := hash@(data) append@(chunks, chunk) chunk := xallocate@chunk(version) data := chunk.data trim@(data, 0) else character_append@(data, character) index :+= 1 else carriage_return :@= "\015\"[0] #FIXME: should be "\r\"[0] loop while index < size character :@= contents[index] if character = carriage_return index :+= 1 character := contents[index] if index < size && character = line_feed # Line separator found: chunk.type := line chunk.data_hash := hash@(data) append@(chunks, chunk) chunk := xallocate@chunk(version) data := chunk.data else character_append@(data, carriage_return) character_append@(data, character) else character_append@(data, character) index :+= 1 # Deal with lines with no terminating new-line: if data.size = 0 deallocate@(chunk) else chunk.type := binary chunk.data_hash := hash@(data) append@(chunks, chunk) procedure fetch1@chunks takes chunks chunks index unsigned returns chunk #: This procedure will return the {index}'th {chunk} object in {chunks}. assert chunks !== ?? return chunks.list[index] procedure is_sorted@chunks takes chunks chunks version version returns logical #: This procedure will validate that each {chunk} that is in {version} #, in {chunks} is properly ordered by the {sort_key} field. {true} #, is returned if everything is properly sorted and {false} otherwise. assert chunks !== ?? list :@= chunks.list size :@= list.size if size = 0 return true previous_chunk :@= list[0] assert previous_chunk.version == version index :@= 1 loop while index < size next_chunk :@= list[index] assert next_chunk !== ?? assert next_chunk.version == version if previous_chunk.sort_key >= next_chunk.sort_key # Out of sort: return false previous_chunk := next_chunk index :+= 1 return true procedure ranges_remove@chunks takes chunks chunks version version returns logical #: This procedure will remove any range {chunk}'s from {chunks} #, that are part of {version}. {true} is removed if at least #, one {chunk} is removed. assert chunks !== ?? assert version !== ?? list :@= chunks.list size :@= list.size removed :@= 0 index :@= 0 loop while index < size chunk :@= list[index] if chunk.type = range deallocate@(chunk) # Remove reduces size by one. size :-= 1 removed :+= 1 else index :+= 1 verify@(chunks, version) return removed != 0 procedure read@chunks takes chunks chunks data_in_stream data_in_stream version version returns_nothing #: This procedure will read in a sequence of chunks from {in_stream} assert chunks !== ?? assert version !== ?? tag_check@(data_in_stream, "L"[0]) size :@= unsigned_read@(data_in_stream) new_line_read@(data_in_stream) list :@= chunks.list truncate@(list, 0) index :@= 0 loop while index < size chunk :@= read@chunk(data_in_stream, version) index :+= 1 procedure show@chunks takes chunks chunks out_stream out_stream returns_nothing #: This procedure will output {chunks} ot {out_stream}. Each {chunk} in #, {chunks} must be of type {line}. show_prefixed@(chunks, "", out_stream) procedure show_prefixed@chunks takes chunks chunks prefix string out_stream out_stream returns_nothing #: This procedure will output {chunks} to {out_stream} where each #, line is prefixed by {prefix}. list :@= chunks.list size :@= list.size index :@= 0 loop while index < size chunk :@= list[index] show_prefixed@(chunk, prefix, out_stream) index :+= 1 procedure share@chunks takes chunks chunks share_table set[chunk] returns_nothing #: This procedure will force each {chunk} in {chunks} to be sharable #, via {share_table}. assert chunks !== ?? list :@= chunks.list size :@= list.size index :@= 0 loop while index < size chunk :@= list[index] list[index] := share@(chunk, share_table) #verify@(chunks, ??) index :+= 1 procedure size_get@chunks takes chunks chunks returns unsigned #: This procedure will return the size of {chunks}. assert chunks !== ?? return chunks.list.size procedure sort@chunks takes chunks chunks version version returns_nothing #: This procedure will sort {chunks} based on the {sort_key} field #, in each {chunk}. Each {chunk} in {chunks} must be a member of #, {version}. if !is_sorted@(chunks, version) list :@= chunks.list sort@(list) # Reassign the offsets, since everything has been reordered: size :@= list.size index :@= 0 loop while index < size chunk :@= list[index] chunk.offset := index index :+= 1 procedure sort_key_bind@chunks takes chunks chunks start_offset unsigned version version returns unsigned #: This procedure will take each {chunk} in {chunks} that is a member #, {version} and assign its {sort_key} field a value starting at #, at {start_offset} and incrementing by one. The last assigned #, value (plus one) is returned. This procedure requires that #, {sort_key_reset}@{chunks}() be called on {chunks} beforehand. assert chunks !== ?? assert version !== ?? assert start_offset != 0 offset :@= start_offset list :@= chunks.list size :@= list.size #debug_stream :@= chunks.resources.global.debug_stream #format@format2[address, unsigned](debug_stream, # 'sort_key_bind@chunks(%X%) size=%d%\n\', chunks.address, size) index :@= 0 loop while index < size chunk :@= list[index] if chunk.version == version && chunk.sort_key = 0 chunk.sort_key := offset #format@format2[string, unsigned](debug_stream, # '%ds%=%d%\n\', chunk.data, offset) offset :+= 1 index :+= 1 return offset procedure sort_key_reset@chunks takes chunks chunks version version returns_nothing #: This procedure will reset the {sort_key} field for each {chunk} in #, {chunks} that is a member of {version} to zero. assert chunks !== ?? assert version !== ?? list :@= chunks.list size :@= list.size index :@= 0 loop while index < size chunk :@= list[index] if chunk.version == version chunk.sort_key := 0 index :+= 1 procedure store1@chunks takes chunks chunks index unsigned chunk chunk returns_nothing #: This procedure will store {chunk} into the {index}'th slot in {chunks}. assert chunks !== ?? chunks.list[index] := chunk procedure string_append@chunks takes chunks chunks text string returns_nothing #: This procedure will take the contents of {chunks} and append them #, to the end of {text}. list :@= chunks.list size :@= list.size new_line :@= "\n\" index :@= 0 loop while index < size chunk :@= list[index] string_append@(text, chunk.data) switch chunk.type case binary #FIXME: do_nothing chunk := chunk case line string_append@(text, new_line) default assert false index :+= 1 procedure trim@chunks takes chunks chunks size unsigned returns_nothing #: This procedure will trim {chunks} to have a size of {size} #debug_stream :@= chunks.resources.global.debug_stream #format@format2[address, unsigned](debug_stream, # 'trim@chunks(%X%, %d%)\n\', chunks.address, size) assert chunks !== ?? truncate@(chunks.list, size) procedure unshare@chunks takes chunks chunks share_table set[chunk] returns_nothing #: This procedure will ensure that each {chunk} in {chunks} is no #, longer in {share_table}. debug_stream :@= chunks.resources.global.debug_stream #format@format1[address](debug_stream, # "unshare@chunks(%X%)\n\", chunks.address) assert chunks !== ?? list :@= chunks.list size :@= list.size #format@format1[unsigned](debug_stream, "size: %d%\n\", size) index :@= 0 loop while index < size chunk :@= list[index] #format@format3[unsigned, address, string](debug_stream, # "unshare@chunks[%d%]: %X% %ds%\n\", # index, chunk.address, chunk.data) assert chunk !== ?? if delete@(share_table, chunk) #format@format1[string](debug_stream, # "Delete failed for %ds%\n\", chunk.data) assert false index :+= 1 procedure verify@chunks takes chunks chunks version version returns_nothing #: This procedure will verify that each {chunk} in {chunks} matches #, {version} (if {version} is no equal to ??@{version}) and has the #, correct offset. A fatal assertion error occurs if any problem is #, encountered. This procedure is used for debugging. assert chunks !== ?? list :@= chunks.list size :@= list.size index :@= 0 loop while index < size chunk :@= list[index] assert chunk !== ?? chunk_version :@= chunk.version assert chunk_version !== ?? assert version == ?? || chunk_version == version assert chunk_version.chunks.list[chunk.offset] == chunk index :+= 1 procedure write@chunks takes chunks chunks data_out_stream data_out_stream version version returns_nothing #: This procedure will write {chunks} to {data_out_stream}. assert chunks !== ?? list :@= chunks.list assert list !== ?? size :@= list.size assert version !== ?? #debug_stream :@= chunks.resources.global.debug_stream #format@format2[address, unsigned](debug_stream, # 'write@chunks(%X%) size=%d%\n\', chunks.address, size) tag_write@(data_out_stream, "L"[0]) unsigned_write@(data_out_stream, size) new_line_write@(data_out_stream) index :@= 0 loop while index < size chunk :@= list[index] write@(chunk, data_out_stream, version) index :+= 1